diff --git a/import_media.py b/import_media.py index ed7df9b..ce135f5 100644 --- a/import_media.py +++ b/import_media.py @@ -4,22 +4,11 @@ Import photos from SD card into folder with todays date + nickname Use: importphotos (--jpg|--raw|--both) Add script to path -''' -''' + TODO: -1. Import configuration from config file -2. Set raw file extension based on camera specified in configuration -3. Create destination folders based on concatination of configuration, - metadata, and event name passed from ARG -4. Create destination sub-folder based on filetype -5. Copy files to appropriate folder -6. Compare files from source -7. Create 'originals' with copy of files from destination after - checksum for photos only 8. Optinally allow specification of a backup location on another disk or NAS to ship a 3rd copy to -9. Optionally cleanup SD only after checksum matching 10. Every config option has an arg override 11. Optionally rename file if event name was passed in -- STRETCH -- @@ -28,25 +17,27 @@ TODO: import os import sys -import yaml +from pprint import pprint import argparse import shutil import hashlib from datetime import datetime +from tqdm import tqdm +import yaml import exifread import ffmpeg -config_file = 'config.yaml' +CONFIG_FILE = 'config.yaml' +files = {} # Read configuration from file try: - with open(config_file, 'r') as f: + with open(CONFIG_FILE, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) except FileNotFoundError: - print("Configuration file not found: ", config_file) - print("Copy config.yaml.EXAMPLE to ", config_file, " and update accordingly.") + print("Configuration file not found: ", CONFIG_FILE) + print("Copy config.yaml.EXAMPLE to ", CONFIG_FILE, " and update accordingly.") -''' Parse Arguments ''' parser = argparse.ArgumentParser() parser.add_argument("-e", "--event", help = "Event Name") @@ -56,29 +47,24 @@ if args.event: event = args.event def md5_hash(f): - print("calculating md5 for ", f) + """ calculates and returns md5 hash """ + #print("calculating md5 for ", f) md5 = hashlib.md5(open(f, 'rb').read()).hexdigest() return md5 def cmp_files(f1,f2): - print('comparing md5 hashes...') + """ Use file hashes to compare files """ return md5_hash(f1) == md5_hash(f2) -def file_classification(f): - print('Classifying media for: ', f) - for classification in config['file_types']: - for ext in config['file_types'][classification]: - if f.lower().endswith(ext): - c = classification - return classification - def get_capture_date(p, t): + """ get capture date from meta """ if t == 'image': with open(p, 'rb') as f: tags = exifread.process_file(f) stamp = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S') elif t == 'video': - stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'], '%Y-%m-%dT%H:%M:%S.%f%z') + stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'], + '%Y-%m-%dT%H:%M:%S.%f%z') elif t == 'audio': stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['date'], '%Y-%m-%d') else: @@ -90,113 +76,166 @@ def get_capture_date(p, t): return year, month, day def create_folder(f): + """ Function to create folder """ try: os.makedirs(f) - except FileExistsError as exists: - print() + except FileExistsError: + pass -def copy_from_source(p, dest_folder, dest_orig_folder, file): - if os.path.exists(os.path.join(dest_folder, file)): - check_match = cmp_files(p, os.path.join(dest_folder, file)) - if check_match == False: +def copy_from_source(p,d,f): + """ Copy file from source to destination """ + if os.path.exists(os.path.join(d, f)): + check_match = cmp_files(p, os.path.join(d, f)) + if check_match is False: print(f'Found duplicate for {p}, renaming destination with md5 appended.') - base, extension = os.path.splitext(file) - file_name_hash = base + '_' + md5_hash(os.path.join(dest_folder, file)) + extension - os.rename(os.path.join(dest_folder, file), os.path.join(dest_folder, file_name_hash)) + base, extension = os.path.splitext(f) + file_name_hash = base + '_' + md5_hash(os.path.join(d, f)) + extension + os.rename(os.path.join(d, f), os.path.join(d, file_name_hash)) + else: + return - shutil.copy(p, dest_folder) - check_match = cmp_files(p, dest_folder + '/' + file) - if check_match == False: - print(f'CRITICAL: md5 hash does not match for {file}') - print(p, ': ', md5_hash(p)) - print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file)) - exit + shutil.copy(p, d) - if dest_orig_folder != False: - shutil.copy(dest_folder + '/' + file, dest_orig_folder) - check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file) - if check_match == False: - print(f'CRITICAL: md5 hash does not match for {file}') - print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file)) - print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file)) - exit - else: - shutil.copy(p, dest_folder) - check_match = cmp_files(p, dest_folder + '/' + file) - if check_match == False: - print(f'CRITICAL: md5 hash does not match for {file}') - print(p, ': ', md5_hash(p)) - print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file)) - exit +def process_file(path, f_type, f_name, ext): + """ gather information and add to dictionary """ - if dest_orig_folder != False: - shutil.copy(dest_folder + '/' + file, dest_orig_folder) - check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file) - if check_match == False: - print(f'CRITICAL: md5 hash does not match for {file}') - print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file)) - print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file)) - exit + i = os.path.join(path,f_name) - # Blindly assume md5 check has passed... - if config['cleanup_sd'] == True: - os.remove(p) + files[i] = { 'folders': {}, 'date': {} } -def process_file(p, t, file, ext): - capture_date = get_capture_date(p, t) - y = capture_date[0] - m = capture_date[1] - d = capture_date[2] + files[i]['folders']['source_path'] = path + files[i]['type'] = f_type + files[i]['name'] = f_name + files[i]['extension'] = ext + + files[i]['date']['capture_date'] = get_capture_date( + os.path.join(files[i]['folders']['source_path'], + files[i]['name']),files[i]['type']) + files[i]['date']['y'] = files[i]['date']['capture_date'][0] + files[i]['date']['m'] = files[i]['date']['capture_date'][1] + files[i]['date']['d'] = files[i]['date']['capture_date'][2] if event: - dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d + '-' + event + files[i]['folders']['destination'] = config['folders']['destination']['base'] + \ + '/' + files[i]['date']['y'] + '/' + \ + files[i]['date']['y'] + '-' + \ + files[i]['date']['m'] + '/' + \ + files[i]['date']['y'] + '-' + \ + files[i]['date']['m'] + '-' + \ + files[i]['date']['d'] + '-' + \ + event else: - dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d + files[i]['folders']['destination'] = config['folders']['destination']['base'] + \ + '/' + files[i]['date']['y'] + '/' + \ + files[i]['date']['y'] + '-' + \ + files[i]['date']['m'] + '/' + \ + files[i]['date']['y'] + '-' + \ + files[i]['date']['m'] + '-' + \ + files[i]['date']['d'] - if t == 'image': - dest_folder = dest_folder + '/photos' + if files[i]['type'] == 'image': + files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/PHOTO' - if config['store_originals'] == True: - dest_orig_folder = dest_folder + '/ORIGINALS' + if config['store_originals'] is True: + files[i]['folders']['destination_original'] = files[i]['folders']['destination'] + \ + '/ORIGINALS' - if ext in ('jpg', 'jpeg'): - dest_folder = dest_folder + '/JPG' - if dest_orig_folder: - dest_orig_folder = dest_orig_folder + '/JPG' + if files[i]['extension'] in ('jpg', 'jpeg'): + files[i]['folders']['destination'] = files[i]['folders']['destination'] + \ + '/JPG' + if files[i]['folders']['destination_original']: + files[i]['folders']['destination_original'] = \ + files[i]['folders']['destination_original'] + '/JPG' else: - dest_folder = dest_folder + '/RAW' - if dest_orig_folder: - dest_orig_folder = dest_orig_folder + '/RAW' + files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/RAW' + if files[i]['folders']['destination_original']: + files[i]['folders']['destination_original'] = \ + files[i]['folders']['destination_original'] + '/RAW' - elif t == 'video': - dest_folder = dest_folder + '/VIDEO' + elif files[i]['type'] == 'video': + files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/VIDEO' - elif t == 'audio': - dest_folder = dest_folder + '/AUDIO' + elif files[i]['type'] == 'audio': + files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/AUDIO' else: - print(f'WARN: {t} is not a known type and you never should have landed here.') - - create_folder(dest_folder) + print('WARN: ', files[i]['type'], ' is not a known type and you never should have landed here.') try: - dest_orig_folder - except NameError: - dest_orig_folder = False - else: - create_folder(dest_orig_folder) + files[i]['folders']['destination_original'] + except (NameError, KeyError): + files[i]['folders']['destination_original'] = False - copy_from_source(p, dest_folder, dest_orig_folder, file) +# copy_from_source(p, d, o, file) - -def file_list(directory): - for folder, subfolders, filename in os.walk(directory): - for t in config['file_types']: - for ext in config['file_types'][t]: - for file in filename: +def find_files(directory): + """ find files to build a dictionary out of """ + for folder, subfolders, filename in tqdm(os.walk(directory), desc = 'Finding Files', ncols = 100): + for f_type in config['file_types']: + for ext in tqdm(config['file_types'][f_type], desc = 'Finding ' + f_type + ' Files', ncols = 100): + for file in tqdm(filename, desc = 'Finding ' + ext + ' Files', ncols = 100): if file.lower().endswith(ext): - p = folder + '/' + file - process_file(p, t, file, ext) + process_file(folder, f_type, file, ext) -file_list(config['folders']['source']['base']) +find_files(config['folders']['source']['base']) + +#pprint(files) + +#for file in files: +# pprint(files[file]['folders']) + +# Copy File +for file in tqdm(files, desc = "Copying Files:", ncols = 100): + create_folder(files[file]['folders']['destination']) + + copy_from_source(os.path.join(files[file]['folders']['source_path'],files[file]['name']), + files[file]['folders']['destination'], + files[file]['name']) + + if files[file]['folders']['destination_original'] is not False: + create_folder(files[file]['folders']['destination_original']) + + copy_from_source(os.path.join(files[file]['folders']['destination'],files[file]['name']), + files[file]['folders']['destination_original'], + files[file]['name']) + +# validate checksum +for file in tqdm(files, desc = "Generating MD5 Hashes:", ncols = 100): + + #print(files[file]) + files[file]['md5_checksums'] = {} + for folder in files[file]['folders']: + #print(files[file]['folders'][folder]) + if files[file]['folders'][folder] is not False: + #print('folder is not false.') + k = os.path.join(files[file]['folders'][folder], files[file]['name']) + #print(k) + + files[file]['md5_checksums'][k] = md5_hash(k) + #print(files[file]['md5_checksums'][k]) + +for file in tqdm(files, desc = "Verifying Checksums:", ncols = 100): + i = 0 + c = {} + for checksum in files[file]['md5_checksums']: + c[i] = files[file]['md5_checksums'][checksum] + if i > 0: + P = i - 1 + if c[i] == c[P]: + files[file]['source_cleanable'] = True + else: + files[file]['source_cleanable'] = False + print(f'FATAL: Checksum validation failed for: \ + {files[file]["name"]} \n{c[i]}\n is not equal to \n{c[P]}\n') + print('\n File Meta:\n') + pprint(files[file]) + i = i + 1 + +# cleanup sd +if config['cleanup_sd'] is True: + for file in tqdm(files, desc = "Cleaning Up SD:", ncols = 100): + if files[file]['source_cleanable'] is True: + os.remove(os.path.join(files[file]['folders']['source_path'],files[file]['name'])) + +#pprint(files) print('done.')