From dbdb03954bb53d0a05704f33e2b7e62117f9cb2a Mon Sep 17 00:00:00 2001 From: kj16609 Date: Tue, 4 Nov 2025 14:36:37 -0500 Subject: [PATCH] Seperates mtime and cluster store time into their own fields --- IDHANMigration/src/120-file_info.sql | 13 +- IDHANServer/src/api/cluster/scan.cpp | 139 +++++++++--------- IDHANServer/src/api/file/importFile.cpp | 2 +- .../src/filesystem/getModifiedTime.cpp | 20 +++ IDHANServer/src/filesystem/utility.hpp | 8 + IDHANServer/src/mime/FileInfo.cpp | 44 +++--- IDHANServer/src/mime/FileInfo.hpp | 1 + 7 files changed, 134 insertions(+), 93 deletions(-) create mode 100644 IDHANServer/src/filesystem/getModifiedTime.cpp diff --git a/IDHANMigration/src/120-file_info.sql b/IDHANMigration/src/120-file_info.sql index 28886cc..cab221e 100644 --- a/IDHANMigration/src/120-file_info.sql +++ b/IDHANMigration/src/120-file_info.sql @@ -1 +1,12 @@ -UPDATE file_info SET cluster_store_time = NULL; \ No newline at end of file +ALTER TABLE file_info + ALTER COLUMN cluster_store_time SET DEFAULT now(); + +ALTER TABLE file_info + ADD COLUMN modified_time TIMESTAMP; + +UPDATE file_info +SET cluster_store_time = now() +WHERE cluster_store_time IS NULL; + +ALTER TABLE file_info + ALTER COLUMN cluster_store_time SET NOT NULL; \ No newline at end of file diff --git a/IDHANServer/src/api/cluster/scan.cpp b/IDHANServer/src/api/cluster/scan.cpp index 3d07b09..ed3ed5b 100644 --- a/IDHANServer/src/api/cluster/scan.cpp +++ b/IDHANServer/src/api/cluster/scan.cpp @@ -82,7 +82,7 @@ class ScanContext std::filesystem::path m_path; std::size_t m_size; - ScanParams m_params; + ScanParams m_params {}; std::string m_mime_name {}; static constexpr auto INVALID_RECORD { std::numeric_limits< RecordID >::max() }; @@ -95,8 +95,11 @@ class ScanContext ExpectedTask< RecordID > checkRecord( SHA256 sha256, DbClientPtr db ); ExpectedTask< void > cleanupDoubleClusters( ClusterID found_cluster_id, DbClientPtr db ); + drogon::Task<> updateFileModifiedTime( drogon::orm::DbClientPtr db ); + ExpectedTask< void > insertFileInfo( drogon::orm::DbClientPtr db ); ExpectedTask< void > checkCluster( DbClientPtr db ); + ExpectedTask< bool > hasMime( DbClientPtr db ); ExpectedTask<> scanMime( DbClientPtr db ); @@ -224,21 +227,22 @@ ExpectedTask< SHA256 > ScanContext::checkSHA256( FileIOUring uring, const std::f sha256_hex ) ); } - // try to fix the mistake - auto new_path { bad_dir / m_path.filename() }; - try { - if ( !m_params.read_only ) std::filesystem::rename( m_path, new_path ); - } - catch ( std::exception& e ) - { - co_return std::unexpected( createInternalError( - "When scanning file at {} it was detected that the filename does not match the sha256 " - "{}. There was an error attempting to fix this: What: {}", - m_path.string(), - sha256_hex, - e.what() ) ); + if ( !m_params.read_only ) + { + const auto new_path { bad_dir / m_path.filename() }; + + // try to fix the mistake + std::filesystem::rename( m_path, new_path ); + + co_return std::unexpected( createInternalError( + "When scanning file at {} it was detected that the filename does " + "not match the sha256 {}. The file has been moved to {}", + m_path.string(), + sha256_hex, + new_path.string() ) ); + } } catch ( ... ) { @@ -248,13 +252,6 @@ ExpectedTask< SHA256 > ScanContext::checkSHA256( FileIOUring uring, const std::f m_path.string(), sha256_hex ) ); } - - co_return std::unexpected( createInternalError( - "When scanning file at {} it was detected that the filename does " - "not match the sha256 {}. The file has been moved to {}", - m_path.string(), - sha256_hex, - new_path.string() ) ); } co_return *sha256_e; @@ -290,18 +287,6 @@ ExpectedTask< RecordID > ScanContext::checkRecord( const SHA256 sha256, drogon:: co_return search_result[ 0 ][ 0 ].as< RecordID >(); } -trantor::Date getLastWriteTime( const std::filesystem::path& path ) -{ - const auto file_mtime_local { std::filesystem::last_write_time( path ) }; - const auto file_mtime_unix { std::chrono::clock_cast< std::chrono::system_clock >( file_mtime_local ) }; - - const trantor::Date date { - std::chrono::duration_cast< std::chrono::microseconds >( file_mtime_unix.time_since_epoch() ).count() - }; - - return date; -} - ExpectedTask< void > ScanContext::cleanupDoubleClusters( const ClusterID found_cluster_id, drogon::orm::DbClientPtr db ) { if ( co_await filesystem::checkFileExists( m_record_id, db ) ) @@ -322,6 +307,8 @@ ExpectedTask< void > ScanContext::cleanupDoubleClusters( const ClusterID found_c std::filesystem::remove( m_path ); } + + co_return {}; } log::warn( @@ -333,34 +320,43 @@ ExpectedTask< void > ScanContext::cleanupDoubleClusters( const ClusterID found_c co_return {}; } +drogon::Task<> ScanContext::updateFileModifiedTime( drogon::orm::DbClientPtr db ) +{ + const trantor::Date date { filesystem::getLastWriteTime( m_path ) }; + + log::debug( "mtime is {}", date.toFormattedString( true ) ); + + co_await db->execSqlCoro( "UPDATE file_info SET modified_time = $1 WHERE record_id = $2", date, m_record_id ); +} + +ExpectedTask<> ScanContext::insertFileInfo( drogon::orm::DbClientPtr db ) +{ + const trantor::Date date { filesystem::getLastWriteTime( m_path ) }; + co_await db->execSqlCoro( + "INSERT INTO file_info (record_id, cluster_id, size, modified_time) VALUES ($1, $2, $3, $4)", + m_record_id, + m_cluster_id, + m_size, + date ); + co_return {}; +} + ExpectedTask<> ScanContext::checkCluster( drogon::orm::DbClientPtr db ) { FGL_ASSERT( m_record_id != INVALID_RECORD, "Invalid record" ); - const auto file_info { co_await db->execSqlCoro( - "SELECT cluster_id, cluster_store_time FROM file_info WHERE record_id = $1", m_record_id ) }; + const auto file_info { + co_await db->execSqlCoro( "SELECT cluster_id, modified_time FROM file_info WHERE record_id = $1", m_record_id ) + }; + // create the file info if it doesn't already exist if ( file_info.empty() ) { - const trantor::Date date { getLastWriteTime( m_path ) }; - - co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, cluster_id, size, cluster_store_time) VALUES ($1, $2, $3, $4)", - m_record_id, - m_cluster_id, - m_size, - date ); - - co_return {}; + co_return co_await insertFileInfo( db ); } - if ( file_info[ 0 ][ "cluster_store_time" ].isNull() ) + if ( file_info[ 0 ][ "modified_time" ].isNull() ) { - const trantor::Date date { getLastWriteTime( m_path ) }; - - log::debug( "mtime is {}", date.toFormattedString( true ) ); - - co_await db->execSqlCoro( - "UPDATE file_info SET cluster_store_time = $1 WHERE record_id = $2", date, m_record_id ); + co_await updateFileModifiedTime( db ); } // we found a cluster, check if it's the one we are about to add too @@ -372,36 +368,41 @@ ExpectedTask<> ScanContext::checkCluster( drogon::orm::DbClientPtr db ) // handle the double count, which will check if the found cluster contains the file and delete it from this one // if found. Otherwise the record's cluster is set to the current cluster auto result { co_await cleanupDoubleClusters( found_cluster_id, db ) }; - if ( !result ) co_return std::unexpected( result.error() ); + return_unexpected_error( result ); } co_return {}; } +ExpectedTask< bool > ScanContext::hasMime( DbClientPtr db ) +{ + auto current_mime { co_await db->execSqlCoro( + "SELECT mime_id, name FROM file_info JOIN mime USING (mime_id) WHERE record_id = $1 AND mime_id IS NOT NULL", + m_record_id ) }; + + if ( !current_mime.empty() ) + { + m_mime_name = current_mime[ 0 ][ 1 ].as< std::string >(); + co_return true; + } + + co_return false; +} + ExpectedTask<> ScanContext::scanMime( DbClientPtr db ) { FGL_ASSERT( m_record_id != INVALID_RECORD, "Invalid record" ); FileIOUring file_io { m_path }; - if ( !m_params.rescan_mime ) // skip checking if we have a mime if we are going to rescan it + // skip checking if we have a mime if we are going to rescan it + if ( !m_params.rescan_mime && co_await hasMime( db ) ) { - auto current_mime { co_await db->execSqlCoro( - "SELECT mime_id, name FROM file_info JOIN mime USING (mime_id) WHERE record_id = $1 AND mime_id IS NOT NULL", - m_record_id ) }; - - if ( !current_mime.empty() ) - { - m_mime_name = current_mime[ 0 ][ 1 ].as< std::string >(); - log::debug( - "Skipping mime scan for {} because it's already been scanned and the rescan_mime flag was false", - m_record_id ); - co_return {}; - } + co_return {}; } const auto mime_string_e { co_await mime::getMimeDatabase()->scan( file_io ) }; - const auto mtime { getLastWriteTime( m_path ) }; + const auto mtime { filesystem::getLastWriteTime( m_path ) }; if ( !mime_string_e ) { @@ -415,7 +416,7 @@ ExpectedTask<> ScanContext::scanMime( DbClientPtr db ) extension_str ); co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, size, extension, cluster_store_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET extension = $3, mime_id = NULL", + "INSERT INTO file_info (record_id, size, extension, modified_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET extension = $3, mime_id = NULL", m_record_id, m_size, extension_str, @@ -431,7 +432,7 @@ ExpectedTask<> ScanContext::scanMime( DbClientPtr db ) return_unexpected_error( mime_id_e ); co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, size, mime_id, cluster_store_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET mime_id = $3", + "INSERT INTO file_info (record_id, size, mime_id, modified_time) VALUES ($1, $2, $3, $4) ON CONFLICT (record_id) DO UPDATE SET mime_id = $3", m_record_id, m_size, *mime_id_e, diff --git a/IDHANServer/src/api/file/importFile.cpp b/IDHANServer/src/api/file/importFile.cpp index 6838231..5f405b2 100644 --- a/IDHANServer/src/api/file/importFile.cpp +++ b/IDHANServer/src/api/file/importFile.cpp @@ -95,7 +95,7 @@ drogon::Task< drogon::HttpResponsePtr > ImportAPI::importFile( const drogon::Htt // try to insert info if it's missing co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, mime_id, size, cluster_store_time) VALUES ($1, $2, $3, now()) ON CONFLICT DO NOTHING", + "INSERT INTO file_info (record_id, mime_id, size, cluster_store_time, modified_time) VALUES ($1, $2, $3, now(), now()) ON CONFLICT DO NOTHING", record_id, *mime_id, data_length ); diff --git a/IDHANServer/src/filesystem/getModifiedTime.cpp b/IDHANServer/src/filesystem/getModifiedTime.cpp new file mode 100644 index 0000000..74b2046 --- /dev/null +++ b/IDHANServer/src/filesystem/getModifiedTime.cpp @@ -0,0 +1,20 @@ +// +// Created by kj16609 on 11/4/25. +// + +#include +#include +#include + +namespace idhan::filesystem +{ + +std::int64_t getLastWriteTime( const std::filesystem::path& path ) +{ + const auto file_mtime_local { std::filesystem::last_write_time( path ) }; + const auto file_mtime_unix { std::chrono::clock_cast< std::chrono::system_clock >( file_mtime_local ) }; + + return std::chrono::duration_cast< std::chrono::microseconds >( file_mtime_unix.time_since_epoch() ).count(); +} + +} // namespace idhan::filesystem \ No newline at end of file diff --git a/IDHANServer/src/filesystem/utility.hpp b/IDHANServer/src/filesystem/utility.hpp index f8ff255..b59a50b 100644 --- a/IDHANServer/src/filesystem/utility.hpp +++ b/IDHANServer/src/filesystem/utility.hpp @@ -49,4 +49,12 @@ enum class FileState * @return */ ExpectedTask< FileState > validateFile( RecordID record_id ); + +/** + * @brief Returns the mtime of the file im microseconds + * @param path + * @return + */ +std::int64_t getLastWriteTime( const std::filesystem::path& path ); + } // namespace idhan::filesystem diff --git a/IDHANServer/src/mime/FileInfo.cpp b/IDHANServer/src/mime/FileInfo.cpp index b6dce37..c7c0668 100644 --- a/IDHANServer/src/mime/FileInfo.cpp +++ b/IDHANServer/src/mime/FileInfo.cpp @@ -14,31 +14,31 @@ namespace idhan drogon::Task<> setFileInfo( const RecordID record_id, const FileInfo info, const DbClientPtr db ) { - const trantor::Date date { + const trantor::Date store_date { std::chrono::duration_cast< std::chrono::microseconds >( info.store_time.time_since_epoch() ).count() }; - if ( info.mime_id == constants::INVALID_MIME_ID ) // if the mime is invalid (unknown) - { - // the extension is used so we can still find the file even with an invalid mime - co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, extension) VALUES ($1, $2, NULL, $3, $4) " - "ON CONFLICT (record_id) DO UPDATE SET size = $2, mime_id = NULL, cluster_store_time = $3, extension = $4", - record_id, - info.size, - date, - info.extension ); - } - else - { - co_await db->execSqlCoro( - "INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, extension) VALUES ($1, $2, $3, $4, NULL) " - "ON CONFLICT (record_id) DO UPDATE SET size = $2, mime_id = $3, cluster_store_time = $4, extension = NULL", - record_id, - info.size, - info.mime_id, - date ); - } + const trantor::Date file_modified_date { + std::chrono::duration_cast< std::chrono::microseconds >( info.modified_time.time_since_epoch() ).count() + }; + + std::optional< MimeID > mime_opt { + info.mime_id != constants::INVALID_MIME_ID ? std::optional< MimeID >( info.mime_id ) : std::nullopt + }; + std::optional< std::string > extension_opt { + info.mime_id != constants::INVALID_MIME_ID ? std::optional< std::string >( info.extension ) : std::nullopt + }; + + // the extension is used so we can still find the file even with an invalid mime + co_await db->execSqlCoro( + "INSERT INTO file_info (record_id, size, mime_id, cluster_store_time, modified_time, extension) VALUES ($1, $2, $3, $4, $5, $6)" + "ON CONFLICT (record_id) DO UPDATE SET mime_id = $3, extension = $5", + record_id, + info.size, + mime_opt, + store_date, + file_modified_date, + extension_opt ); } drogon::Task< std::expected< FileInfo, drogon::HttpResponsePtr > > gatherFileInfo( diff --git a/IDHANServer/src/mime/FileInfo.hpp b/IDHANServer/src/mime/FileInfo.hpp index 332bf50..00f745e 100644 --- a/IDHANServer/src/mime/FileInfo.hpp +++ b/IDHANServer/src/mime/FileInfo.hpp @@ -27,6 +27,7 @@ struct FileInfo MimeID mime_id; std::string extension; std::chrono::time_point< std::chrono::system_clock > store_time; + std::chrono::time_point< std::chrono::system_clock > modified_time; }; //! Populates a FileInfo struct with information from the data