Merge pull request #33 from KJNeko/28-hydrus-database-metadata-importing-and-cluster-scan-timestamps
Implements mtime from files
This commit is contained in:
7
IDHANMigration/src/120-file_info.sql
Normal file
7
IDHANMigration/src/120-file_info.sql
Normal file
@@ -0,0 +1,7 @@
|
||||
ALTER TABLE file_info
|
||||
ALTER COLUMN cluster_store_time SET DEFAULT now();
|
||||
ALTER TABLE file_info
|
||||
ALTER COLUMN cluster_store_time SET NOT NULL;
|
||||
|
||||
ALTER TABLE file_info
|
||||
ADD COLUMN modified_time TIMESTAMP;
|
||||
@@ -82,7 +82,7 @@ class ScanContext
|
||||
std::filesystem::path m_path;
|
||||
std::size_t m_size;
|
||||
|
||||
ScanParams m_params;
|
||||
ScanParams m_params {};
|
||||
std::string m_mime_name {};
|
||||
|
||||
static constexpr auto INVALID_RECORD { std::numeric_limits< RecordID >::max() };
|
||||
@@ -95,8 +95,11 @@ class ScanContext
|
||||
ExpectedTask< RecordID > checkRecord( SHA256 sha256, DbClientPtr db );
|
||||
|
||||
ExpectedTask< void > cleanupDoubleClusters( ClusterID found_cluster_id, DbClientPtr db );
|
||||
drogon::Task<> updateFileModifiedTime( drogon::orm::DbClientPtr db );
|
||||
ExpectedTask< void > insertFileInfo( drogon::orm::DbClientPtr db );
|
||||
|
||||
ExpectedTask< void > checkCluster( DbClientPtr db );
|
||||
ExpectedTask< bool > hasMime( DbClientPtr db );
|
||||
|
||||
ExpectedTask<> scanMime( DbClientPtr db );
|
||||
|
||||
@@ -224,21 +227,22 @@ ExpectedTask< SHA256 > ScanContext::checkSHA256( FileIOUring uring, const std::f
|
||||
sha256_hex ) );
|
||||
}
|
||||
|
||||
// try to fix the mistake
|
||||
auto new_path { bad_dir / m_path.filename() };
|
||||
|
||||
try
|
||||
{
|
||||
if ( !m_params.read_only ) std::filesystem::rename( m_path, new_path );
|
||||
}
|
||||
catch ( std::exception& e )
|
||||
{
|
||||
co_return std::unexpected( createInternalError(
|
||||
"When scanning file at {} it was detected that the filename does not match the sha256 "
|
||||
"{}. There was an error attempting to fix this: What: {}",
|
||||
m_path.string(),
|
||||
sha256_hex,
|
||||
e.what() ) );
|
||||
if ( !m_params.read_only )
|
||||
{
|
||||
const auto new_path { bad_dir / m_path.filename() };
|
||||
|
||||
// try to fix the mistake
|
||||
std::filesystem::rename( m_path, new_path );
|
||||
|
||||
co_return std::unexpected( createInternalError(
|
||||
"When scanning file at {} it was detected that the filename does "
|
||||
"not match the sha256 {}. The file has been moved to {}",
|
||||
m_path.string(),
|
||||
sha256_hex,
|
||||
new_path.string() ) );
|
||||
}
|
||||
}
|
||||
catch ( ... )
|
||||
{
|
||||
@@ -248,13 +252,6 @@ ExpectedTask< SHA256 > ScanContext::checkSHA256( FileIOUring uring, const std::f
|
||||
m_path.string(),
|
||||
sha256_hex ) );
|
||||
}
|
||||
|
||||
co_return std::unexpected( createInternalError(
|
||||
"When scanning file at {} it was detected that the filename does "
|
||||
"not match the sha256 {}. The file has been moved to {}",
|
||||
m_path.string(),
|
||||
sha256_hex,
|
||||
new_path.string() ) );
|
||||
}
|
||||
|
||||
co_return *sha256_e;
|
||||
@@ -310,6 +307,8 @@ ExpectedTask< void > ScanContext::cleanupDoubleClusters( const ClusterID found_c
|
||||
|
||||
std::filesystem::remove( m_path );
|
||||
}
|
||||
|
||||
co_return {};
|
||||
}
|
||||
|
||||
log::warn(
|
||||
@@ -321,22 +320,43 @@ ExpectedTask< void > ScanContext::cleanupDoubleClusters( const ClusterID found_c
|
||||
co_return {};
|
||||
}
|
||||
|
||||
drogon::Task<> ScanContext::updateFileModifiedTime( drogon::orm::DbClientPtr db )
|
||||
{
|
||||
const trantor::Date date { filesystem::getLastWriteTime( m_path ) };
|
||||
|
||||
log::debug( "mtime is {}", date.toFormattedString( true ) );
|
||||
|
||||
co_await db->execSqlCoro( "UPDATE file_info SET modified_time = $1 WHERE record_id = $2", date, m_record_id );
|
||||
}
|
||||
|
||||
ExpectedTask<> ScanContext::insertFileInfo( drogon::orm::DbClientPtr db )
|
||||
{
|
||||
const trantor::Date date { filesystem::getLastWriteTime( m_path ) };
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, cluster_id, size, modified_time) VALUES ($1, $2, $3, $4)",
|
||||
m_record_id,
|
||||
m_cluster_id,
|
||||
m_size,
|
||||
date );
|
||||
co_return {};
|
||||
}
|
||||
|
||||
ExpectedTask<> ScanContext::checkCluster( drogon::orm::DbClientPtr db )
|
||||
{
|
||||
FGL_ASSERT( m_record_id != INVALID_RECORD, "Invalid record" );
|
||||
const auto file_info {
|
||||
co_await db->execSqlCoro( "SELECT cluster_id FROM file_info WHERE record_id = $1", m_record_id )
|
||||
co_await db->execSqlCoro( "SELECT cluster_id, modified_time FROM file_info WHERE record_id = $1", m_record_id )
|
||||
};
|
||||
|
||||
// create the file info if it doesn't already exist
|
||||
if ( file_info.empty() )
|
||||
{
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, cluster_id, size, cluster_store_time) VALUES ($1, $2, $3, now())",
|
||||
m_record_id,
|
||||
m_cluster_id,
|
||||
m_size );
|
||||
co_return co_await insertFileInfo( db );
|
||||
}
|
||||
|
||||
co_return {};
|
||||
if ( file_info[ 0 ][ "modified_time" ].isNull() )
|
||||
{
|
||||
co_await updateFileModifiedTime( db );
|
||||
}
|
||||
|
||||
// we found a cluster, check if it's the one we are about to add too
|
||||
@@ -348,35 +368,42 @@ ExpectedTask<> ScanContext::checkCluster( drogon::orm::DbClientPtr db )
|
||||
// handle the double count, which will check if the found cluster contains the file and delete it from this one
|
||||
// if found. Otherwise the record's cluster is set to the current cluster
|
||||
auto result { co_await cleanupDoubleClusters( found_cluster_id, db ) };
|
||||
if ( !result ) co_return std::unexpected( result.error() );
|
||||
return_unexpected_error( result );
|
||||
}
|
||||
|
||||
co_return {};
|
||||
}
|
||||
|
||||
ExpectedTask< bool > ScanContext::hasMime( DbClientPtr db )
|
||||
{
|
||||
auto current_mime { co_await db->execSqlCoro(
|
||||
"SELECT mime_id, name FROM file_info JOIN mime USING (mime_id) WHERE record_id = $1 AND mime_id IS NOT NULL",
|
||||
m_record_id ) };
|
||||
|
||||
if ( !current_mime.empty() )
|
||||
{
|
||||
m_mime_name = current_mime[ 0 ][ 1 ].as< std::string >();
|
||||
co_return true;
|
||||
}
|
||||
|
||||
co_return false;
|
||||
}
|
||||
|
||||
ExpectedTask<> ScanContext::scanMime( DbClientPtr db )
|
||||
{
|
||||
FGL_ASSERT( m_record_id != INVALID_RECORD, "Invalid record" );
|
||||
FileIOUring file_io { m_path };
|
||||
|
||||
if ( !m_params.rescan_mime ) // skip checking if we have a mime if we are going to rescan it
|
||||
// skip checking if we have a mime if we are going to rescan it
|
||||
if ( !m_params.rescan_mime && co_await hasMime( db ) )
|
||||
{
|
||||
auto current_mime { co_await db->execSqlCoro(
|
||||
"SELECT mime_id, name FROM file_info JOIN mime USING (mime_id) WHERE record_id = $1 AND mime_id IS NOT NULL",
|
||||
m_record_id ) };
|
||||
|
||||
if ( !current_mime.empty() )
|
||||
{
|
||||
m_mime_name = current_mime[ 0 ][ 1 ].as< std::string >();
|
||||
log::debug(
|
||||
"Skipping mime scan for {} because it's already been scanned and the rescan_mime flag was false",
|
||||
m_record_id );
|
||||
co_return {};
|
||||
}
|
||||
co_return {};
|
||||
}
|
||||
|
||||
const auto mime_string_e { co_await mime::getMimeDatabase()->scan( file_io ) };
|
||||
|
||||
const auto mtime { filesystem::getLastWriteTime( m_path ) };
|
||||
|
||||
if ( !mime_string_e )
|
||||
{
|
||||
std::string extension_str { m_path.extension().string() };
|
||||
@@ -389,10 +416,11 @@ ExpectedTask<> ScanContext::scanMime( DbClientPtr db )
|
||||
extension_str );
|
||||
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, size, extension, cluster_store_time) VALUES ($1, $2, $3, now()) ON CONFLICT (record_id) DO UPDATE SET extension = $3, mime_id = NULL",
|
||||
"INSERT INTO file_info (record_id, size, extension, modified_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET extension = $3, mime_id = NULL",
|
||||
m_record_id,
|
||||
m_size,
|
||||
extension_str );
|
||||
extension_str,
|
||||
mtime );
|
||||
|
||||
co_return {};
|
||||
}
|
||||
@@ -404,10 +432,11 @@ ExpectedTask<> ScanContext::scanMime( DbClientPtr db )
|
||||
return_unexpected_error( mime_id_e );
|
||||
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, size, mime_id, cluster_store_time) VALUES ($1, $2, $3, now()) ON CONFLICT (record_id) DO UPDATE SET mime_id = $3",
|
||||
"INSERT INTO file_info (record_id, size, mime_id, modified_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET mime_id = $3",
|
||||
m_record_id,
|
||||
m_size,
|
||||
*mime_id_e );
|
||||
*mime_id_e,
|
||||
mtime );
|
||||
|
||||
const auto mime_info {
|
||||
co_await db->execSqlCoro( "SELECT best_extension FROM mime WHERE mime_id = $1", *mime_id_e )
|
||||
|
||||
@@ -95,7 +95,7 @@ drogon::Task< drogon::HttpResponsePtr > ImportAPI::importFile( const drogon::Htt
|
||||
|
||||
// try to insert info if it's missing
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, mime_id, size, cluster_store_time) VALUES ($1, $2, $3, now()) ON CONFLICT DO NOTHING",
|
||||
"INSERT INTO file_info (record_id, mime_id, size, cluster_store_time, modified_time) VALUES ($1, $2, $3, now(), now()) ON CONFLICT DO NOTHING",
|
||||
record_id,
|
||||
*mime_id,
|
||||
data_length );
|
||||
|
||||
20
IDHANServer/src/filesystem/getModifiedTime.cpp
Normal file
20
IDHANServer/src/filesystem/getModifiedTime.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by kj16609 on 11/4/25.
|
||||
//
|
||||
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
|
||||
namespace idhan::filesystem
|
||||
{
|
||||
|
||||
std::int64_t getLastWriteTime( const std::filesystem::path& path )
|
||||
{
|
||||
const auto file_mtime_local { std::filesystem::last_write_time( path ) };
|
||||
const auto file_mtime_unix { std::chrono::clock_cast< std::chrono::system_clock >( file_mtime_local ) };
|
||||
|
||||
return std::chrono::duration_cast< std::chrono::microseconds >( file_mtime_unix.time_since_epoch() ).count();
|
||||
}
|
||||
|
||||
} // namespace idhan::filesystem
|
||||
@@ -49,4 +49,12 @@ enum class FileState
|
||||
* @return
|
||||
*/
|
||||
ExpectedTask< FileState > validateFile( RecordID record_id );
|
||||
|
||||
/**
|
||||
* @brief Returns the mtime of the file im microseconds
|
||||
* @param path
|
||||
* @return
|
||||
*/
|
||||
std::int64_t getLastWriteTime( const std::filesystem::path& path );
|
||||
|
||||
} // namespace idhan::filesystem
|
||||
|
||||
@@ -14,31 +14,31 @@ namespace idhan
|
||||
|
||||
drogon::Task<> setFileInfo( const RecordID record_id, const FileInfo info, const DbClientPtr db )
|
||||
{
|
||||
const trantor::Date date {
|
||||
const trantor::Date store_date {
|
||||
std::chrono::duration_cast< std::chrono::microseconds >( info.store_time.time_since_epoch() ).count()
|
||||
};
|
||||
|
||||
if ( info.mime_id == constants::INVALID_MIME_ID ) // if the mime is invalid (unknown)
|
||||
{
|
||||
// the extension is used so we can still find the file even with an invalid mime
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, extension) VALUES ($1, $2, NULL, $3, $4) "
|
||||
"ON CONFLICT (record_id) DO UPDATE SET size = $2, mime_id = NULL, cluster_store_time = $3, extension = $4",
|
||||
record_id,
|
||||
info.size,
|
||||
date,
|
||||
info.extension );
|
||||
}
|
||||
else
|
||||
{
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, extension) VALUES ($1, $2, $3, $4, NULL) "
|
||||
"ON CONFLICT (record_id) DO UPDATE SET size = $2, mime_id = $3, cluster_store_time = $4, extension = NULL",
|
||||
record_id,
|
||||
info.size,
|
||||
info.mime_id,
|
||||
date );
|
||||
}
|
||||
const trantor::Date file_modified_date {
|
||||
std::chrono::duration_cast< std::chrono::microseconds >( info.modified_time.time_since_epoch() ).count()
|
||||
};
|
||||
|
||||
std::optional< MimeID > mime_opt {
|
||||
info.mime_id != constants::INVALID_MIME_ID ? std::optional< MimeID >( info.mime_id ) : std::nullopt
|
||||
};
|
||||
std::optional< std::string > extension_opt {
|
||||
info.mime_id != constants::INVALID_MIME_ID ? std::optional< std::string >( info.extension ) : std::nullopt
|
||||
};
|
||||
|
||||
// the extension is used so we can still find the file even with an invalid mime
|
||||
co_await db->execSqlCoro(
|
||||
"INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, modified_time, extension) VALUES ($1, $2, $3, $4, $5, $6)"
|
||||
"ON CONFLICT (record_id) DO UPDATE SET mime_id = $3, extension = $5",
|
||||
record_id,
|
||||
info.size,
|
||||
mime_opt,
|
||||
store_date,
|
||||
file_modified_date,
|
||||
extension_opt );
|
||||
}
|
||||
|
||||
drogon::Task< std::expected< FileInfo, drogon::HttpResponsePtr > > gatherFileInfo(
|
||||
|
||||
@@ -27,6 +27,7 @@ struct FileInfo
|
||||
MimeID mime_id;
|
||||
std::string extension;
|
||||
std::chrono::time_point< std::chrono::system_clock > store_time;
|
||||
std::chrono::time_point< std::chrono::system_clock > modified_time;
|
||||
};
|
||||
|
||||
//! Populates a FileInfo struct with information from the data
|
||||
|
||||
Reference in New Issue
Block a user