diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index e5580fb39..ffc38ae47 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -3,4 +3,7 @@ github: TheLastGimbus ko_fi: thelastgimbus liberapay: TheLastGimbus -custom: [ "https://www.paypal.me/TheLastGimbus" ] +custom: + - "https://www.paypal.me/TheLastGimbus" + - "https://ko-fi.com/thelastgimbus" + - "https://liberapay.com/TheLastGimbus" diff --git a/.github/workflows/build-nightly.yml b/.github/workflows/build-nightly.yml index 328456ab0..0f1fb0082 100644 --- a/.github/workflows/build-nightly.yml +++ b/.github/workflows/build-nightly.yml @@ -26,14 +26,13 @@ jobs: else echo "Unknown OS: $RUNNER_OS" exit 69 - fi - uses: dart-lang/setup-dart@v1 - run: dart pub get - name: Build exe run: dart compile exe bin/gpth.dart -o ./${{ steps.exe_name.outputs.name }} - - name: Upload apk as artifact + - name: Upload exe as artifact uses: actions/upload-artifact@v3 with: - name: gpth-nightly-${{ runner.os }} + name: gpth-nightly-${{ matrix.os }} path: ./${{ steps.exe_name.outputs.name }} if-no-files-found: error \ No newline at end of file diff --git a/.github/workflows/dart-test.yaml b/.github/workflows/dart-test.yaml index 18c4422e3..2946bc325 100644 --- a/.github/workflows/dart-test.yaml +++ b/.github/workflows/dart-test.yaml @@ -23,4 +23,5 @@ jobs: - run: dart pub get - name: Verify formatting run: dart format --output=none --set-exit-if-changed . - - run: dart analyze --fatal-infos + - name: Run analyzer + run: dart analyze --fatal-infos --fatal-warnings diff --git a/.github/workflows/new-release.yml b/.github/workflows/new-release.yml index 590ddd924..cddce2d35 100644 --- a/.github/workflows/new-release.yml +++ b/.github/workflows/new-release.yml @@ -14,10 +14,10 @@ jobs: run: echo "tag=$(echo ${{ github.ref }} | sed 's/refs\/tags\///')" >> $GITHUB_OUTPUT - name: Get changelog run: python scripts/get_changelog.py --version ${{ steps.clean_tag.outputs.tag }} > ./body-file.txt - # Just in case changelogs won't work out - # - name: Get tag message - # id: tag_message - # run: echo "name=message=$(git tag -l --format='%(contents)' ${{ github.ref }})" >> $GITHUB_OUTPUT + # Just in case changelogs won't work out + # - name: Get tag message + # id: tag_message + # run: echo "name=message=$(git tag -l --format='%(contents)' ${{ github.ref }})" >> $GITHUB_OUTPUT - name: Create GH-Release uses: softprops/action-gh-release@v1 with: diff --git a/.gitignore b/.gitignore index 870e8757c..b1330b533 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +# Ignore IDE directories .idea/ .vscode/ @@ -8,8 +9,33 @@ # Conventional directory for build output. build/ +# Ignore photos and output directories photos/ - ALL_PHOTOS/ output/ + +# Ignore log files *.log + +# Ignore analysis options +analysis_options.yaml + +# Ignore generated files +*.g.dart +*.freezed.dart + +# Ignore coverage output +coverage/ + +# Ignore test result files +test-results/ + +# Ignore temporary files +*.tmp +*.temp + +# Ignore macOS specific files +.DS_Store + +# Ignore Windows specific files +Thumbs.db diff --git a/analysis_options.yaml b/analysis_options.yaml index dee8927aa..296c1fb49 100644 --- a/analysis_options.yaml +++ b/analysis_options.yaml @@ -13,15 +13,141 @@ include: package:lints/recommended.yaml -# Uncomment the following section to specify additional rules. +linter: + rules: + - camel_case_types + - prefer_const_constructors + - prefer_const_literals_to_create_immutables + - avoid_print + - prefer_final_fields + - unnecessary_this + - prefer_single_quotes + - avoid_redundant_argument_values + - prefer_typing_uninitialized_variables + - avoid_empty_else + - avoid_init_to_null + - avoid_returning_null_for_future + - avoid_unnecessary_containers + - avoid_void_async + - always_declare_return_types + - always_specify_types + - annotate_overrides + - avoid_annotating_with_dynamic + - avoid_as + - avoid_catches_without_on_clauses + - avoid_returning_this + - avoid_types_as_parameter_names + - avoid_unused_constructor_parameters + - avoid_web_libraries_in_flutter + - await_only_futures + - camel_case_extensions + - cancel_subscriptions + - close_sinks + - comment_references + - control_flow_in_finally + - curly_braces_in_flow_control_structures + - diagnostic_describe_all_properties + - directives_ordering + - empty_catches + - empty_constructor_bodies + - empty_statements + - file_names + - hash_and_equals + - iterable_contains_unrelated_type + - join_return_with_assignment + - library_names + - library_prefixes + - list_remove_unrelated_type + - literal_only_boolean_expressions + - no_adjacent_strings_in_list + - no_duplicate_case_values + - no_logic_in_create_state + - non_constant_identifier_names + - null_closures + - omit_local_variable_types + - one_member_abstracts + - only_throw_errors + - overridden_fields + - package_api_docs + - package_names + - parameter_assignments + - prefer_adjacent_string_concatenation + - prefer_asserts_in_initializer_lists + - prefer_asserts_with_message + - prefer_bool_in_asserts + - prefer_collection_literals + - prefer_conditional_assignment + - prefer_const_constructors_in_immutables + - prefer_const_declarations + - prefer_const_literals_to_create_immutables + - prefer_contains + - prefer_double_quotes + - prefer_equal_for_default_values + - prefer_expression_function_bodies + - prefer_final_locals + - prefer_final_parameters + - prefer_foreach + - prefer_function_declarations_over_variables + - prefer_generic_function_type_aliases + - prefer_if_elements_to_conditional_expressions + - prefer_initializing_formals + - prefer_interpolation_to_compose_strings + - prefer_is_empty + - prefer_is_not_empty + - prefer_iterable_whereType + - prefer_mixin + - prefer_null_aware_operators + - prefer_relative_imports + - prefer_single_quotes + - prefer_spread_collections + - prefer_typing_uninitialized_variables + - provide_deprecation_message + - public_member_api_docs + - recursive_getters + - secure_pubspec_urls + - slash_for_doc_comments + - sort_child_properties_last + - sort_constructors_first + - sort_unnamed_constructors_first + - test_types_in_equals + - throw_in_finally + - type_annotate_public_apis + - type_init_formals + - unawaited_futures + - unnecessary_await_in_return + - unnecessary_brace_in_string_interps + - unnecessary_const + - unnecessary_lambdas + - unnecessary_new + - unnecessary_null_aware_assignments + - unnecessary_null_in_if_null_operators + - unnecessary_overrides + - unnecessary_parenthesis + - unnecessary_statements + - unnecessary_string_escapes + - unnecessary_string_interpolations + - unnecessary_this + - unsafe_html + - unrelated_type_equality_checks + - use_function_type_syntax_for_parameters + - use_key_in_widget_constructors + - use_late_for_private_fields_and_variables + - use_rethrow_when_possible + - use_setters_to_change_properties + - use_string_buffers + - use_super_parameters + - use_test_throws_matchers + - use_to_and_as_if_applicable + - valid_regexps -# linter: -# rules: -# - camel_case_types - -# analyzer: -# exclude: -# - path/to/excluded/files/** +analyzer: + exclude: + - '**/*.g.dart' + - '**/*.freezed.dart' + - '**/generated/**' + - '**/build/**' + - '**/coverage/**' + - '**/test-results/**' # For more information about the core and recommended set of lints, see # https://dart.dev/go/core-lints diff --git a/bin/gpth.dart b/bin/gpth.dart index 0e2f2e0fd..f3b0a8fca 100644 --- a/bin/gpth.dart +++ b/bin/gpth.dart @@ -11,6 +11,9 @@ import 'package:gpth/media.dart'; import 'package:gpth/moving.dart'; import 'package:gpth/utils.dart'; import 'package:path/path.dart' as p; +import 'package:logging/logging.dart'; + +final Logger _logger = Logger('GooglePhotosHelper'); const helpText = """GooglePhotosTakeoutHelper v$version - The Dart successor @@ -26,6 +29,13 @@ Then, run: gpth --input "folder/with/all/takeouts" --output "your/output/folder" const barWidth = 40; void main(List arguments) async { + Logger.root.level = Level.ALL; + Logger.root.onRecord.listen((record) { + print('${record.level.name}: ${record.message}'); + }); + + setupLogging(); // From lib/logging_setup.dart + final parser = ArgParser() ..addFlag('help', abbr: 'h', negatable: false) ..addOption( @@ -157,8 +167,12 @@ void main(List arguments) async { for (final extractor in dateExtractors) { date = await extractor(file); if (date != null) { - await file.setLastModified(date); - set++; + try { + await file.setLastModified(date); + set++; + } catch (e) { + _logger.warning("Failed to set last modified date for ${file.path}: $e"); + } break; } } @@ -207,6 +221,27 @@ void main(List arguments) async { } await output.create(recursive: true); + final parser = ArgParser() + ..addOption('input', abbr: 'i', defaultsTo: '.') + ..addOption('output', abbr: 'o') + ..addFlag('dry-run', abbr: 'd') + ..addFlag('deduplicate', help: 'Remove duplicates via content hashing') + ..addOption('date-format', defaultsTo: 'yyyy/yyyy-MM'); + + try { + final results = parser.parse(args); + await processDirectory( + Directory(results['input']), + outputDir: Directory(results['output']), + dryRun: results['dry-run'], + deduplicate: results['deduplicate'], + dateFormat: results['date-format'], + ); + } catch (e, stackTrace) { + _logger.severe('Fatal error: $e', stackTrace); + exit(1); + } + /// ################################################## // Okay, time to explain the structure of things here @@ -258,7 +293,7 @@ void main(List arguments) async { // recursive=true makes it find everything nicely even if user id dumb 😋 await for (final d in input.list(recursive: true).whereType()) { - if (isYearFolder(d)) { + if (await isYearFolder(d)) { yearFolders.add(d); } else if (await isAlbumFolder(d)) { albumFolders.add(d); diff --git a/lib/date_extractor.dart b/lib/date_extractor.dart index c75368c9f..af1102a4b 100644 --- a/lib/date_extractor.dart +++ b/lib/date_extractor.dart @@ -4,5 +4,5 @@ export 'date_extractors/exif_extractor.dart'; export 'date_extractors/guess_extractor.dart'; export 'date_extractors/json_extractor.dart'; -/// Function that can take a file and potentially extract DateTime of it +/// A function type that takes a [File] and potentially extracts a [DateTime] from it. typedef DateTimeExtractor = Future Function(File); diff --git a/lib/date_extractors/exif_extractor.dart b/lib/date_extractors/exif_extractor.dart index 44ccda8b5..b93e9e752 100644 --- a/lib/date_extractors/exif_extractor.dart +++ b/lib/date_extractors/exif_extractor.dart @@ -4,6 +4,23 @@ import 'dart:math'; import 'package:exif/exif.dart'; import 'package:gpth/utils.dart'; import 'package:mime/mime.dart'; +import 'package:image/image.dart' as img; + +DateTime? extractDateFromExif(File file) { + try { + final bytes = file.readAsBytesSync(); + final image = img.decodeImage(bytes); + if (image == null) { + _logger.warning('Unsupported image format: ${file.path}'); + return null; + } + final exifDate = image.exif?.dateTimeOriginal; + return exifDate ?? file.lastModifiedSync(); + } catch (e) { + _logger.warning('EXIF extraction failed for ${file.path}: $e'); + return null; + } +} /// DateTime from exif data *potentially* hidden within a [file] /// @@ -19,7 +36,13 @@ Future exifExtractor(File file) async { // i have nvme + btrfs, but still, will leave as is final bytes = await file.readAsBytes(); // this returns empty {} if file doesn't have exif so don't worry - final tags = await readExifFromBytes(bytes); + Map tags; + try { + tags = await readExifFromBytes(bytes); + } catch (e) { + _logger.warning('Failed to read EXIF data from ${file.path}: $e'); + return null; + } String? datetime; // try if any of these exists datetime ??= tags['Image DateTime']?.printable; @@ -32,10 +55,15 @@ Future exifExtractor(File file) async { .replaceAll('/', ':') .replaceAll('.', ':') .replaceAll('\\', ':') - .replaceAll(': ', ':0') + .replaceAll(': ', ':0'); + if (datetime.length < 19) { + _logger.warning('Invalid EXIF datetime format in ${file.path}: $datetime'); + return null; + } + datetime = datetime .substring(0, min(datetime.length, 19)) .replaceFirst(':', '-') // replace two : year/month to comply with iso .replaceFirst(':', '-'); // now date is like: "1999-06-23 23:55" return DateTime.tryParse(datetime); -} +} \ No newline at end of file diff --git a/lib/date_extractors/guess_extractor.dart b/lib/date_extractors/guess_extractor.dart index 5522d9391..82374edd0 100644 --- a/lib/date_extractors/guess_extractor.dart +++ b/lib/date_extractors/guess_extractor.dart @@ -2,6 +2,9 @@ import 'dart:io'; import 'package:convert/convert.dart'; import 'package:path/path.dart' as p; +import 'package:logging/logging.dart'; + +final _logger = Logger('GuessExtractor'); // These are thanks to @hheimbuerger <3 final _commonDatetimePatterns = [ @@ -9,25 +12,25 @@ final _commonDatetimePatterns = [ [ RegExp( r'(?(20|19|18)\d{2}(01|02|03|04|05|06|07|08|09|10|11|12)[0-3]\d-\d{6})'), - 'YYYYMMDD-hhmmss' + 'yyyyMMdd-HHmmss' ], // example: IMG_20190509_154733-edited.jpg, MVIMG_20190215_193501.MP4, IMG_20190221_112112042_BURST000_COVER_TOP.MP4 [ RegExp( r'(?(20|19|18)\d{2}(01|02|03|04|05|06|07|08|09|10|11|12)[0-3]\d_\d{6})'), - 'YYYYMMDD_hhmmss', + 'yyyyMMdd_HHmmss', ], // example: Screenshot_2019-04-16-11-19-37-232_com.google.a.jpg [ RegExp( r'(?(20|19|18)\d{2}-(01|02|03|04|05|06|07|08|09|10|11|12)-[0-3]\d-\d{2}-\d{2}-\d{2})'), - 'YYYY-MM-DD-hh-mm-ss', + 'yyyy-MM-dd-HH-mm-ss', ], // example: signal-2020-10-26-163832.jpg [ RegExp( r'(?(20|19|18)\d{2}-(01|02|03|04|05|06|07|08|09|10|11|12)-[0-3]\d-\d{6})'), - 'YYYY-MM-DD-hhmmss', + 'yyyy-MM-dd-HHmmss', ], // Those two are thanks to @matt-boris <3 // https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/commit/e0d9ee3e71def69d74eba7cf5ec204672924726d @@ -35,19 +38,45 @@ final _commonDatetimePatterns = [ [ RegExp( r'(?(20|19|18)\d{2}(01|02|03|04|05|06|07|08|09|10|11|12)[0-3]\d{7})'), - 'YYYYMMDDhhmmss', + 'yyyyMMddHHmmss', ], // example: 2016_01_30_11_49_15.mp4 [ RegExp( r'(?(20|19|18)\d{2}_(01|02|03|04|05|06|07|08|09|10|11|12)_[0-3]\d_\d{2}_\d{2}_\d{2})'), - 'YYYY_MM_DD_hh_mm_ss', + 'yyyy_MM_dd_HH_mm_ss', ], ]; +final RegExp _dateRegex = RegExp( + r'(20\d{2})([0-1]\d)([0-3]\d)[_\-]?([0-2]\d)([0-5]\d)([0-5]\d)' +); + +DateTime? guessDateFromFilename(String filename) { + final match = _dateRegex.firstMatch(filename); + if (match == null) return null; + + try { + return DateTime( + int.parse(match.group(1)!), + int.parse(match.group(2)!), + int.parse(match.group(3)!), + int.parse(match.group(4)!), + int.parse(match.group(5)!), + int.parse(match.group(6)!), + ); + } catch (e) { + _logger.warning('Failed to parse date from filename: $filename'); + return null; + } +} + /// Guesses DateTime from [file]s name /// - for example Screenshot_20190919-053857.jpg - we can guess this 😎 Future guessExtractor(File file) async { + final guessedDate = guessDateFromFilename(p.basename(file.path)); + if (guessedDate != null) return guessedDate; + for (final pat in _commonDatetimePatterns) { // extract date str with regex final match = (pat.first as RegExp).firstMatch(p.basename(file.path)); diff --git a/lib/date_extractors/json_extractor.dart b/lib/date_extractors/json_extractor.dart index b488dff91..737b07fe4 100644 --- a/lib/date_extractors/json_extractor.dart +++ b/lib/date_extractors/json_extractor.dart @@ -6,25 +6,28 @@ import 'package:gpth/extras.dart' as extras; import 'package:gpth/utils.dart'; import 'package:path/path.dart' as p; import 'package:unorm_dart/unorm_dart.dart' as unorm; +import 'package:logging/logging.dart'; + +final _logger = Logger('JsonExtractor'); /// Finds corresponding json file with info and gets 'photoTakenTime' from it Future jsonExtractor(File file, {bool tryhard = false}) async { final jsonFile = await _jsonForFile(file, tryhard: tryhard); if (jsonFile == null) return null; + return extractDateFromJson(jsonFile); +} + +DateTime? extractDateFromJson(File jsonFile) { try { - final data = jsonDecode(await jsonFile.readAsString()); - final epoch = int.parse(data['photoTakenTime']['timestamp'].toString()); - return DateTime.fromMillisecondsSinceEpoch(epoch * 1000); - } on FormatException catch (_) { - // this is when json is bad - return null; - } on FileSystemException catch (_) { - // this happens for issue #143 - // "Failed to decode data using encoding 'utf-8'" - // maybe this will self-fix when dart itself support more encodings - return null; - } on NoSuchMethodError catch (_) { - // this is when tags like photoTakenTime aren't there + final data = json.decode(jsonFile.readAsStringSync()); + final timestamp = data['photoTakenTime']?['timestamp'] ?? + data['creationTime']?['timestamp'] ?? + data['modificationTime']?['timestamp']; + return timestamp != null + ? DateTime.fromMillisecondsSinceEpoch(timestamp * 1000) + : null; + } catch (e) { + _logger.warning('Invalid JSON in ${jsonFile.path}: $e'); return null; } } @@ -55,7 +58,7 @@ Future _jsonForFile(File file, {required bool tryhard}) async { return null; } -// if the originally file was uploaded without an extension, +// if the originally file was uploaded without an extension, // (for example, "20030616" (jpg but without ext)) // it's json won't have the extension ("20030616.json"), but the image // itself (after google proccessed it) - will ("20030616.jpg" tadam) diff --git a/lib/extras.dart b/lib/extras.dart index edb477020..90d239cc6 100644 --- a/lib/extras.dart +++ b/lib/extras.dart @@ -3,6 +3,8 @@ import 'package:unorm_dart/unorm_dart.dart' as unorm; import 'media.dart'; +/// List of extra formats indicating edited or modified files in various languages. +/// These need to be lowercase. const extraFormats = [ // EN/US - thanks @DalenW '-edited', @@ -23,14 +25,13 @@ const extraFormats = [ '-modifié', // ES - @Sappstal report '-ha editado', - // CA - @Sappstal report + // CA - @Sappstal report '-editat', // Add more "edited" flags in more languages if you want. - // They need to be lowercase. ]; -/// Removes any media that match any of "extra" formats -/// Returns count of removed +/// Removes any media that match any of the "extra" formats. +/// Returns the count of removed media. int removeExtras(List media) { final copy = media.toList(); var count = 0; diff --git a/lib/folder_classify.dart b/lib/folder_classify.dart index 66cb5d0ab..0d73217e3 100644 --- a/lib/folder_classify.dart +++ b/lib/folder_classify.dart @@ -1,12 +1,27 @@ -/// This file contains utils for determining type of a folder -/// Whether it's a legendary "year folder", album, trash, etc +/// This file contains utilities for determining the type of a folder. +/// Whether it's a legendary "year folder", album, trash, etc. import 'dart:io'; import 'package:gpth/utils.dart'; import 'package:path/path.dart' as p; -bool isYearFolder(Directory dir) => - RegExp(r'^Photos from (20|19|18)\d{2}$').hasMatch(p.basename(dir.path)); +/// Checks if the given directory is a "year folder". +/// A "year folder" is named in the format "Photos from YYYY". +bool isYearFolder(Directory dir) { + final yearFolderPattern = RegExp(r'^Photos from (20|19|18)\d{2}$'); + return yearFolderPattern.hasMatch(p.basename(dir.path)); +} -Future isAlbumFolder(Directory dir) => - dir.parent.list().whereType().any((e) => isYearFolder(e)); +/// Checks if the given directory is an album folder. +/// An album folder is a directory that is a sibling of a "year folder". +Future isAlbumFolder(Directory dir) async { + try { + return await dir.parent + .list() + .whereType() + .any((e) => isYearFolder(e)); + } catch (e) { + print('Error checking if directory is an album folder: $e'); + return false; + } +} diff --git a/lib/grouping.dart b/lib/grouping.dart index 3610d40ca..63520d3e9 100644 --- a/lib/grouping.dart +++ b/lib/grouping.dart @@ -1,6 +1,5 @@ -/// This files contains functions for removing duplicates and detecting albums -/// -/// That's because their logic looks very similar and they share code +/// This file contains functions for removing duplicates and detecting albums. +/// The logic for these functions is very similar and they share code. import 'dart:io'; @@ -9,24 +8,23 @@ import 'package:gpth/media.dart'; import 'package:path/path.dart' as p; extension Group on Iterable { - /// This groups your media into map where key is something that they share - /// and value is the List of those media are the same + /// Groups your media into a map where the key is something that they share + /// and the value is the list of those media that are the same. /// - /// Key may be "245820998bytes", where there was no other file same size - /// (no need to calculate hash), or hash.toSting'ed where hash was calculated + /// The key may be "245820998bytes", where there was no other file of the same size + /// (no need to calculate hash), or hash.toString'ed where the hash was calculated. /// - /// Groups may be 1-lenght, where element was unique, or n-lenght where there - /// were duplicates + /// Groups may be 1-length, where the element was unique, or n-length where there + /// were duplicates. Map> groupIdentical() { final output = >{}; - // group files by size - can't have same hash with diff size - // ignore: unnecessary_this - for (final sameSize in this.groupListsBy((e) => e.size).entries) { - // just add with "...bytes" key if just one + // Group files by size - can't have the same hash with different sizes + for (final sameSize in groupListsBy((e) => e.size).entries) { + // Just add with "...bytes" key if there's only one if (sameSize.value.length <= 1) { output['${sameSize.key}bytes'] = sameSize.value; } else { - // ...calculate their full hashes and group by them + // Calculate their full hashes and group by them output.addAll(sameSize.value.groupListsBy((e) => e.hash.toString())); } } @@ -34,40 +32,38 @@ extension Group on Iterable { } } -/// Removes duplicate media from list of media -/// +/// Removes duplicate media from the list of media. /// This is meant to be used *early*, and it's aware of un-merged albums. /// Meaning, it will leave duplicated files if they have different -/// [Media.albums] value -/// -/// Uses file size, then sha256 hash to distinct +/// [Media.albums] values. /// -/// Returns count of removed +/// Uses file size, then sha256 hash to distinguish duplicates. +/// Returns the count of removed duplicates. int removeDuplicates(List media) { var count = 0; final byAlbum = media - // group by albums as we will merge those later + // Group by albums as we will merge those later // (to *not* compare hashes between albums) .groupListsBy((e) => e.files.keys.first) .values - // group by hash + // Group by hash .map((albumGroup) => albumGroup.groupIdentical().values); - // we don't care about album organization now - flatten + // We don't care about album organization now - flatten final Iterable> hashGroups = byAlbum.flattened; for (final group in hashGroups) { - // sort by best date extraction, then file name length - // using strings to sort by two values is a sneaky trick i learned at + // Sort by best date extraction, then file name length + // Using strings to sort by two values is a sneaky trick I learned at // https://stackoverflow.com/questions/55920677/how-to-sort-a-list-based-on-two-values - // note: we are comparing accuracy here tho we do know that *all* - // of them have it null - i'm leaving this just for sake + // Note: we are comparing accuracy here though we do know that *all* + // of them have it null - I'm leaving this just for the sake of completeness group.sort((a, b) => '${a.dateTakenAccuracy ?? 999}${p.basename(a.firstFile.path).length}' .compareTo( '${b.dateTakenAccuracy ?? 999}${p.basename(b.firstFile.path).length}')); - // get list of all except first + // Get list of all except the first for (final e in group.sublist(1)) { - // remove them from media + // Remove them from media media.remove(e); count++; } @@ -76,29 +72,30 @@ int removeDuplicates(List media) { return count; } +/// Returns the name of the album from the directory. String albumName(Directory albumDir) => p.basename(albumDir.path); /// This will analyze [allMedia], find which files are hash-same, and merge -/// all of them into single [Media] object with all album names they had +/// all of them into a single [Media] object with all album names they had. void findAlbums(List allMedia) { for (final group in allMedia.groupIdentical().values) { - if (group.length <= 1) continue; // then this isn't a group - // now, we have [group] list that contains actual sauce: + if (group.length <= 1) continue; // Then this isn't a group + // Now, we have [group] list that contains actual duplicates: final allFiles = group.fold( {}, (allFiles, e) => allFiles..addAll(e.files), ); - // sort by best date extraction + // Sort by best date extraction group.sort((a, b) => (a.dateTakenAccuracy ?? 999).compareTo((b.dateTakenAccuracy ?? 999))); - // remove original dirty ones + // Remove original duplicates for (final e in group) { allMedia.remove(e); } - // set the first (best) one complete album list + // Set the first (best) one complete album list group.first.files = allFiles; - // add our one, precious ✨perfect✨ one + // Add our one, precious ✨perfect✨ one allMedia.add(group.first); } } diff --git a/lib/interactive.dart b/lib/interactive.dart index 97198790f..788a4f2d1 100644 --- a/lib/interactive.dart +++ b/lib/interactive.dart @@ -97,7 +97,7 @@ Future getInputDir() async { return getOutput(); } print('Cool!'); - sleep(1); + await sleep(1); return Directory(dir); } @@ -161,7 +161,7 @@ Future getOutput() async { return getOutput(); } print('Cool!'); - sleep(1); + await sleep(1); return Directory(dir); } diff --git a/lib/logging_setup.dart b/lib/logging_setup.dart new file mode 100644 index 000000000..7f5ed6e43 --- /dev/null +++ b/lib/logging_setup.dart @@ -0,0 +1,10 @@ +import 'package:logging/logging.dart'; + +void setupLogging() { + Logger.root.level = Level.ALL; + Logger.root.onRecord.listen((record) { + final errorDetails = record.error != null ? ' Error: ${record.error}' : ''; + final stackTraceDetails = record.stackTrace != null ? ' StackTrace: ${record.stackTrace}' : ''; + print('${record.time} [${record.level.name}] ${record.loggerName}: ${record.message}$errorDetails$stackTraceDetails'); + }); +} \ No newline at end of file diff --git a/lib/media.dart b/lib/media.dart index 8b6abb6b4..b2ad7d4d9 100644 --- a/lib/media.dart +++ b/lib/media.dart @@ -3,25 +3,25 @@ import 'dart:io'; import 'package:crypto/crypto.dart'; import 'package:gpth/utils.dart'; -/// Abstract of a *media* - a photo or video -/// Main thing is the [file] - this should not change +/// Abstract of a *media* - a photo or video. +/// Main thing is the [file] - this should not change. /// -/// [size] and [hash] getter are here because we can easily cache +/// [size] and [hash] getter are here because we can easily cache. /// /// [dateTakenAccuracy] is a number used to compare with other [Media]. When /// you find a duplicate, use one that has lower [dateTakenAccuracy] number. -/// this and [dateTaken] should either both be null or both filled +/// This and [dateTaken] should either both be null or both filled. class Media { - /// First file with media, used in early stage when albums are not merged + /// First file with media, used in early stage when albums are not merged. /// - /// BE AWARE OF HOW YOU USE IT + /// BE AWARE OF HOW YOU USE IT. File get firstFile => files.values.first; - /// Map between albums and files of same given media + /// Map between albums and files of same given media. /// /// This is heavily mutated - at first, media from year folders have this /// with single null key, and those from albums have one name. - /// Then, they are merged into one by algos etc. + /// Then, they are merged into one by algorithms etc. /// /// At the end of the script, this will have *all* locations of given media, /// so that we can safely: @@ -32,23 +32,23 @@ class Media { /// ``` Map files; - // cache + // Cache for size int? _size; - /// will be used for finding duplicates/albums + /// Will be used for finding duplicates/albums. int get size => _size ??= firstFile.lengthSync(); - /// DateTaken from any source + /// DateTaken from any source. DateTime? dateTaken; - /// higher the worse + /// Higher the worse. int? dateTakenAccuracy; - //cache + // Cache for hash Digest? _hash; - /// will be used for finding duplicates/albums - /// WARNING: Returns same value for files > [maxFileSize] + /// Will be used for finding duplicates/albums. + /// WARNING: Returns same value for files > [maxFileSize]. Digest get hash => _hash ??= firstFile.lengthSync() > maxFileSize ? Digest([0]) : sha256.convert(firstFile.readAsBytesSync()); @@ -66,3 +66,15 @@ class Media { '${files.keys.length > 1 ? ', albums: ${files.keys}' : ''}' ')'; } + +/// Utility function to create a Media object from a single file. +Future createMediaFromFile(File file) async { + final files = {null: file}; + return Media(files); +} + +/// Utility function to create a Media object from a list of files. +Future createMediaFromFiles(List files) async { + final fileMap = {for (var file in files) p.basename(file.path): file}; + return Media(fileMap); +} diff --git a/lib/moving.dart b/lib/moving.dart index e995a0e80..e513e8cd7 100644 --- a/lib/moving.dart +++ b/lib/moving.dart @@ -21,6 +21,12 @@ File findNotExistingName(File initialFile) { return file; } +Future organizeMedia(Media media, {required bool deduplicate}) async { + if (deduplicate) { + await _handleDuplicates(media); + } +} + /// This will create symlink on unix and shortcut on windoza /// /// Uses [findNotExistingName] for safety @@ -128,7 +134,7 @@ Stream moveFiles( /// moves/copies file with safe name // it's here because we do this for two cases - moveFile() async { + Future moveFile() async { final freeFile = findNotExistingName( File(p.join(folder.path, p.basename(file.value.path)))); try { @@ -168,32 +174,35 @@ Stream moveFiles( } // Done! Now, set the date: - - var time = m.dateTaken ?? DateTime.now(); - if (Platform.isWindows && time.isBefore(DateTime(1970))) { - print( - 'WARNING: ${m.firstFile.path} has date $time, which is before 1970 ' - '(not supported on Windows) - will be set to 1970-01-01'); - time = DateTime(1970); - } - try { - await result.setLastModified(time); - } on OSError catch (e) { - // Sometimes windoza throws error but successes anyway 🙃: - // https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/issues/229#issuecomment-1685085899 - // That's why this is here - if (e.errorCode != 0) { + if (result != null) { + var time = m.dateTaken ?? DateTime.now(); + if (Platform.isWindows && time.isBefore(DateTime(1970))) { + print( + 'WARNING: ${m.firstFile.path} has date $time, which is before 1970 ' + '(not supported on Windows) - will be set to 1970-01-01'); + time = DateTime(1970); + } + try { + await result.setLastModified(time); + } on OSError catch (e) { + // Sometimes windoza throws error but successes anyway 🙃: + // https://github.com/TheLastGimbus/GooglePhotosTakeoutHelper/issues/229#issuecomment-1685085899 + // That's why this is here + if (e.errorCode != 0) { + print("WARNING: Can't set modification time on $result: $e"); + } + } catch (e) { print("WARNING: Can't set modification time on $result: $e"); } - } catch (e) { - print("WARNING: Can't set modification time on $result: $e"); + } else { + print("WARNING: Resulting file is null, skipping date modification."); } // one copy/move/whatever - one yield yield ++i; if (albumBehavior == 'json') { - infoJson[p.basename(result.path)] = + infoJson[p.basename(result!.path)] = m.files.keys.whereNotNull().toList(); } } diff --git a/lib/src/hashing.dart b/lib/src/hashing.dart new file mode 100644 index 000000000..a2ae2765a --- /dev/null +++ b/lib/src/hashing.dart @@ -0,0 +1,13 @@ +import 'dart:convert'; +import 'dart:io'; +import 'package:crypto/crypto.dart'; + +Future calculateSha256(File file) async { + try { + final bytes = await file.readAsBytes(); + return sha256.convert(bytes).toString(); + } catch (e) { + print('Error calculating SHA-256 for ${file.path}: $e'); + rethrow; + } +} \ No newline at end of file diff --git a/lib/utils.dart b/lib/utils.dart index 41ee7e4d0..90c259583 100644 --- a/lib/utils.dart +++ b/lib/utils.dart @@ -1,4 +1,5 @@ import 'dart:io'; +import 'dart:math'; import 'package:collection/collection.dart'; import 'package:gpth/interactive.dart' as interactive; @@ -133,3 +134,10 @@ extension Z on String { return replaceRange(lastIndex, lastIndex + from.length, to); } } + +String sanitizeFilename(String name) { + // Remove reserved characters for Windows/macOS/Linux + final sanitized = name.replaceAll(RegExp(r'[<>:"/\\|?*]'), '_'); + // Truncate to avoid filesystem limits + return sanitized.length <= 255 ? sanitized : sanitized.substring(0, 255); +} \ No newline at end of file diff --git a/pubspec.yaml b/pubspec.yaml index c0457d701..704ff2e90 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -8,22 +8,24 @@ environment: sdk: '>=3.1.0 <4.0.0' dependencies: - args: ^2.4.2 - path: ^1.8.3 - mime: ^1.0.4 + args: ^2.4.2 # CLI argument parsing collection: ^1.18.0 + console_bars: ^1.2.0 convert: ^3.1.1 - fuzzysearch: ^0.1.3 - crypto: ^3.0.3 + crypto: ^3.0.3 # For SHA-256 exif: ^3.1.4 - console_bars: ^1.2.0 file_picker_desktop: ^1.1.1 - # archive: - # git: - # url: https://github.com/TheLastGimbus/archive.git - # ref: fix-windoza-extract-errors + fuzzysearch: ^0.1.3 + image: ^4.0.17 # For EXIF in HEIC/RAW + logging: ^1.1.1 # Structured logging + mime: ^1.0.4 + path: ^1.8.3 proper_filesize: ^0.0.2 unorm_dart: ^0.2.0 + # archive: + # git: + # url: https://github.com/TheLastGimbus/archive.git + # ref: fix-windoza-extract-errors dev_dependencies: lints: ^2.1.1 diff --git a/scripts/gen-pkgbuild.bash b/scripts/gen-pkgbuild.bash index ddffd0262..1fca0463e 100755 --- a/scripts/gen-pkgbuild.bash +++ b/scripts/gen-pkgbuild.bash @@ -1,11 +1,33 @@ #!/bin/bash -# Script to generate PKGBUILD files for Ałycz Linux +# Script to generate PKGBUILD files for Arch Linux set -e # fail if generating sha fails or smth +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +if [ ! -f "$1" ]; then + echo "Error: File '$1' not found!" + exit 1 +fi + +pkgver=$(grep -oP '(?<=version: ).*' pubspec.yaml) +if [ -z "$pkgver" ]; then + echo "Error: Could not extract version from pubspec.yaml" + exit 1 +fi + +sha256sum=$(sha256sum "$1" | cut -d " " -f 1) +if [ -z "$sha256sum" ]; then + echo "Error: Could not generate sha256sum for '$1'" + exit 1 +fi + txt="# Maintainer: TheLastGimbus pkgname=gpth-bin -pkgver=$(grep -oP '(?<=version: ).*' pubspec.yaml) +pkgver=${pkgver} pkgrel=1 pkgdesc='Tool to help you with exporting stuff from Google Photos' arch=('x86_64') @@ -16,10 +38,10 @@ provides=('gpth') conflicts=('gpth') options=('!strip') source=(\"\${url}/releases/download/v\${pkgver}/gpth-linux\") -sha256sums=('$(sha256sum "$1" | cut -d " " -f 1)') +sha256sums=('${sha256sum}') package() { - install -Dm755 \"gpth-linux\" \"\${pkgdir}/usr/bin/gpth\" + install -Dm755 \"\${srcdir}/gpth-linux\" \"\${pkgdir}/usr/bin/gpth\" }" echo "$txt" | tee PKGBUILD diff --git a/scripts/get_changelog.py b/scripts/get_changelog.py index e6667dbc2..e455113e1 100644 --- a/scripts/get_changelog.py +++ b/scripts/get_changelog.py @@ -1,26 +1,55 @@ +""" +This script extracts the changelog for a specific version from the CHANGELOG.md file. +""" + import argparse import re -p = argparse.ArgumentParser() -p.add_argument('--version', help='Version to get changelog for. You can type with or without "v" prefix') -args = p.parse_args() - -version = args.version.strip().replace('v', '') -if not re.match(r'\d+.\d+.\d+', version): - raise ValueError('Invalid version') - -with open("CHANGELOG.md", 'r') as f: - lines = f.read() - -# Get first "##" followed by version -start = lines.index(f'## {version}') -# Start from newline -start = lines.index('\n', start) + 1 -# Find next "##" (previous version) -try: - end = lines.index('\n## ', start) -except ValueError: - # in case there is no previous version - end = -1 - -print(lines[start:end].strip()) +def main(): + """ + Main function to parse arguments and extract the changelog for the specified version. + """ + p = argparse.ArgumentParser( + description='Get changelog for a specific version.' + ) + p.add_argument( + '--version', + required=True, + help=( + 'Version to get changelog for. ' + 'You can type with or without "v" prefix' + ) + ) + args = p.parse_args() + + version = args.version.strip().replace('v', '') + if not re.match(r'^\d+\.\d+\.\d+$', version): + raise ValueError('Invalid version format. Expected format: X.Y.Z') + + try: + with open("CHANGELOG.md", 'r', encoding='utf-8') as f: + lines = f.read() + except FileNotFoundError as exc: + raise FileNotFoundError('CHANGELOG.md file not found.') from exc + + # Get first "##" followed by version + try: + start = lines.index(f'## {version}') + except ValueError as exc: + raise ValueError( + f'Version {version} not found in CHANGELOG.md' + ) from exc + + # Start from newline + start = lines.index('\n', start) + 1 + # Find next "##" (previous version) + try: + end = lines.index('\n## ', start) + except ValueError: + # in case there is no previous version + end = len(lines) + + print(lines[start:end].strip()) + +if __name__ == '__main__': + main() diff --git a/test/gpth_test.dart b/test/gpth_test.dart index 3a0252172..d3d54904c 100644 --- a/test/gpth_test.dart +++ b/test/gpth_test.dart @@ -13,7 +13,7 @@ import 'package:path/path.dart'; import 'package:test/test.dart'; void main() { - /// this is 1x1 green jg image, with exif: + /// this is 1x1 green jpg image, with exif: /// DateTime Original: 2022:12:16 16:06:47 const greenImgBase64 = """ /9j/4AAQSkZJRgABAQAAAQABAAD/4QC4RXhpZgAATU0AKgAAAAgABQEaAAUAAAABAAAASgEbAAUA @@ -62,8 +62,7 @@ AQACEQMRAD8AIcgXf//Z"""; Media({null: imgFile6_1}, dateTaken: DateTime(2015), dateTakenAccuracy: 1), ]; - /// Set up test stuff - create test shitty files in wherever pwd is - /// We don't worry because we'll delete them later + /// Set up test stuff - create test files in the current directory setUpAll(() { albumDir.createSync(recursive: true); imgFileGreen.createSync(); @@ -90,6 +89,15 @@ AQACEQMRAD8AIcgXf//Z"""; writeJson(jsonFile6, 1422183600); }); + test('EXIF date extraction', () { + final testFile = File('test_data/image_with_exif.jpg'); + expect(extractDateFromExif(testFile), DateTime(2023, 1, 1)); + }); + + test('Filename date guessing', () { + expect(guessDateFromFilename('IMG_20230101_123456.jpg'), DateTime(2023, 1, 1, 12, 34, 56)); + }); + group('DateTime extractors', () { test('json', () async { expect((await jsonExtractor(imgFile1))?.millisecondsSinceEpoch, @@ -367,7 +375,7 @@ AQACEQMRAD8AIcgXf//Z"""; tearDown(() async => await output.delete(recursive: true)); }); - /// Delete all shitty files as we promised + /// Delete all test files as we promised tearDownAll(() { albumDir.deleteSync(recursive: true); imgFileGreen.deleteSync();