refactoring and add tqdm

This commit is contained in:
Kameron Kenny 2023-08-10 11:21:13 -04:00
parent c884753f09
commit c23164d428
1 changed files with 151 additions and 112 deletions

View File

@ -4,22 +4,11 @@
Import photos from SD card into folder with todays date + nickname Import photos from SD card into folder with todays date + nickname
Use: importphotos (--jpg|--raw|--both) <nickname of folder (optional)> Use: importphotos (--jpg|--raw|--both) <nickname of folder (optional)>
Add script to path Add script to path
'''
'''
TODO: TODO:
1. Import configuration from config file
2. Set raw file extension based on camera specified in configuration
3. Create destination folders based on concatination of configuration,
metadata, and event name passed from ARG
4. Create destination sub-folder based on filetype
5. Copy files to appropriate folder
6. Compare files from source
7. Create 'originals' with copy of files from destination after
checksum for photos only
8. Optinally allow specification of a backup location on another disk 8. Optinally allow specification of a backup location on another disk
or NAS to ship a 3rd copy to or NAS to ship a 3rd copy to
9. Optionally cleanup SD only after checksum matching
10. Every config option has an arg override 10. Every config option has an arg override
11. Optionally rename file if event name was passed in 11. Optionally rename file if event name was passed in
-- STRETCH -- -- STRETCH --
@ -28,25 +17,27 @@ TODO:
import os import os
import sys import sys
import yaml from pprint import pprint
import argparse import argparse
import shutil import shutil
import hashlib import hashlib
from datetime import datetime from datetime import datetime
from tqdm import tqdm
import yaml
import exifread import exifread
import ffmpeg import ffmpeg
config_file = 'config.yaml' CONFIG_FILE = 'config.yaml'
files = {}
# Read configuration from file # Read configuration from file
try: try:
with open(config_file, 'r') as f: with open(CONFIG_FILE, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader) config = yaml.load(f, Loader=yaml.FullLoader)
except FileNotFoundError: except FileNotFoundError:
print("Configuration file not found: ", config_file) print("Configuration file not found: ", CONFIG_FILE)
print("Copy config.yaml.EXAMPLE to ", config_file, " and update accordingly.") print("Copy config.yaml.EXAMPLE to ", CONFIG_FILE, " and update accordingly.")
''' Parse Arguments '''
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-e", "--event", help = "Event Name") parser.add_argument("-e", "--event", help = "Event Name")
@ -56,29 +47,24 @@ if args.event:
event = args.event event = args.event
def md5_hash(f): def md5_hash(f):
print("calculating md5 for ", f) """ calculates and returns md5 hash """
#print("calculating md5 for ", f)
md5 = hashlib.md5(open(f, 'rb').read()).hexdigest() md5 = hashlib.md5(open(f, 'rb').read()).hexdigest()
return md5 return md5
def cmp_files(f1,f2): def cmp_files(f1,f2):
print('comparing md5 hashes...') """ Use file hashes to compare files """
return md5_hash(f1) == md5_hash(f2) return md5_hash(f1) == md5_hash(f2)
def file_classification(f):
print('Classifying media for: ', f)
for classification in config['file_types']:
for ext in config['file_types'][classification]:
if f.lower().endswith(ext):
c = classification
return classification
def get_capture_date(p, t): def get_capture_date(p, t):
""" get capture date from meta """
if t == 'image': if t == 'image':
with open(p, 'rb') as f: with open(p, 'rb') as f:
tags = exifread.process_file(f) tags = exifread.process_file(f)
stamp = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S') stamp = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S')
elif t == 'video': elif t == 'video':
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'], '%Y-%m-%dT%H:%M:%S.%f%z') stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'],
'%Y-%m-%dT%H:%M:%S.%f%z')
elif t == 'audio': elif t == 'audio':
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['date'], '%Y-%m-%d') stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['date'], '%Y-%m-%d')
else: else:
@ -90,113 +76,166 @@ def get_capture_date(p, t):
return year, month, day return year, month, day
def create_folder(f): def create_folder(f):
""" Function to create folder """
try: try:
os.makedirs(f) os.makedirs(f)
except FileExistsError as exists: except FileExistsError:
print() pass
def copy_from_source(p, dest_folder, dest_orig_folder, file): def copy_from_source(p,d,f):
if os.path.exists(os.path.join(dest_folder, file)): """ Copy file from source to destination """
check_match = cmp_files(p, os.path.join(dest_folder, file)) if os.path.exists(os.path.join(d, f)):
if check_match == False: check_match = cmp_files(p, os.path.join(d, f))
if check_match is False:
print(f'Found duplicate for {p}, renaming destination with md5 appended.') print(f'Found duplicate for {p}, renaming destination with md5 appended.')
base, extension = os.path.splitext(file) base, extension = os.path.splitext(f)
file_name_hash = base + '_' + md5_hash(os.path.join(dest_folder, file)) + extension file_name_hash = base + '_' + md5_hash(os.path.join(d, f)) + extension
os.rename(os.path.join(dest_folder, file), os.path.join(dest_folder, file_name_hash)) os.rename(os.path.join(d, f), os.path.join(d, file_name_hash))
else:
return
shutil.copy(p, dest_folder) shutil.copy(p, d)
check_match = cmp_files(p, dest_folder + '/' + file)
if check_match == False:
print(f'CRITICAL: md5 hash does not match for {file}')
print(p, ': ', md5_hash(p))
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
exit
if dest_orig_folder != False: def process_file(path, f_type, f_name, ext):
shutil.copy(dest_folder + '/' + file, dest_orig_folder) """ gather information and add to dictionary """
check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file)
if check_match == False:
print(f'CRITICAL: md5 hash does not match for {file}')
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file))
exit
else:
shutil.copy(p, dest_folder)
check_match = cmp_files(p, dest_folder + '/' + file)
if check_match == False:
print(f'CRITICAL: md5 hash does not match for {file}')
print(p, ': ', md5_hash(p))
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
exit
if dest_orig_folder != False: i = os.path.join(path,f_name)
shutil.copy(dest_folder + '/' + file, dest_orig_folder)
check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file)
if check_match == False:
print(f'CRITICAL: md5 hash does not match for {file}')
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file))
exit
# Blindly assume md5 check has passed... files[i] = { 'folders': {}, 'date': {} }
if config['cleanup_sd'] == True:
os.remove(p)
def process_file(p, t, file, ext): files[i]['folders']['source_path'] = path
capture_date = get_capture_date(p, t) files[i]['type'] = f_type
y = capture_date[0] files[i]['name'] = f_name
m = capture_date[1] files[i]['extension'] = ext
d = capture_date[2]
files[i]['date']['capture_date'] = get_capture_date(
os.path.join(files[i]['folders']['source_path'],
files[i]['name']),files[i]['type'])
files[i]['date']['y'] = files[i]['date']['capture_date'][0]
files[i]['date']['m'] = files[i]['date']['capture_date'][1]
files[i]['date']['d'] = files[i]['date']['capture_date'][2]
if event: if event:
dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d + '-' + event files[i]['folders']['destination'] = config['folders']['destination']['base'] + \
'/' + files[i]['date']['y'] + '/' + \
files[i]['date']['y'] + '-' + \
files[i]['date']['m'] + '/' + \
files[i]['date']['y'] + '-' + \
files[i]['date']['m'] + '-' + \
files[i]['date']['d'] + '-' + \
event
else: else:
dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d files[i]['folders']['destination'] = config['folders']['destination']['base'] + \
'/' + files[i]['date']['y'] + '/' + \
files[i]['date']['y'] + '-' + \
files[i]['date']['m'] + '/' + \
files[i]['date']['y'] + '-' + \
files[i]['date']['m'] + '-' + \
files[i]['date']['d']
if t == 'image': if files[i]['type'] == 'image':
dest_folder = dest_folder + '/photos' files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/PHOTO'
if config['store_originals'] == True: if config['store_originals'] is True:
dest_orig_folder = dest_folder + '/ORIGINALS' files[i]['folders']['destination_original'] = files[i]['folders']['destination'] + \
'/ORIGINALS'
if ext in ('jpg', 'jpeg'): if files[i]['extension'] in ('jpg', 'jpeg'):
dest_folder = dest_folder + '/JPG' files[i]['folders']['destination'] = files[i]['folders']['destination'] + \
if dest_orig_folder: '/JPG'
dest_orig_folder = dest_orig_folder + '/JPG' if files[i]['folders']['destination_original']:
files[i]['folders']['destination_original'] = \
files[i]['folders']['destination_original'] + '/JPG'
else: else:
dest_folder = dest_folder + '/RAW' files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/RAW'
if dest_orig_folder: if files[i]['folders']['destination_original']:
dest_orig_folder = dest_orig_folder + '/RAW' files[i]['folders']['destination_original'] = \
files[i]['folders']['destination_original'] + '/RAW'
elif t == 'video': elif files[i]['type'] == 'video':
dest_folder = dest_folder + '/VIDEO' files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/VIDEO'
elif t == 'audio': elif files[i]['type'] == 'audio':
dest_folder = dest_folder + '/AUDIO' files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/AUDIO'
else: else:
print(f'WARN: {t} is not a known type and you never should have landed here.') print('WARN: ', files[i]['type'], ' is not a known type and you never should have landed here.')
create_folder(dest_folder)
try: try:
dest_orig_folder files[i]['folders']['destination_original']
except NameError: except (NameError, KeyError):
dest_orig_folder = False files[i]['folders']['destination_original'] = False
else:
create_folder(dest_orig_folder)
copy_from_source(p, dest_folder, dest_orig_folder, file) # copy_from_source(p, d, o, file)
def find_files(directory):
def file_list(directory): """ find files to build a dictionary out of """
for folder, subfolders, filename in os.walk(directory): for folder, subfolders, filename in tqdm(os.walk(directory), desc = 'Finding Files', ncols = 100):
for t in config['file_types']: for f_type in config['file_types']:
for ext in config['file_types'][t]: for ext in tqdm(config['file_types'][f_type], desc = 'Finding ' + f_type + ' Files', ncols = 100):
for file in filename: for file in tqdm(filename, desc = 'Finding ' + ext + ' Files', ncols = 100):
if file.lower().endswith(ext): if file.lower().endswith(ext):
p = folder + '/' + file process_file(folder, f_type, file, ext)
process_file(p, t, file, ext)
file_list(config['folders']['source']['base']) find_files(config['folders']['source']['base'])
#pprint(files)
#for file in files:
# pprint(files[file]['folders'])
# Copy File
for file in tqdm(files, desc = "Copying Files:", ncols = 100):
create_folder(files[file]['folders']['destination'])
copy_from_source(os.path.join(files[file]['folders']['source_path'],files[file]['name']),
files[file]['folders']['destination'],
files[file]['name'])
if files[file]['folders']['destination_original'] is not False:
create_folder(files[file]['folders']['destination_original'])
copy_from_source(os.path.join(files[file]['folders']['destination'],files[file]['name']),
files[file]['folders']['destination_original'],
files[file]['name'])
# validate checksum
for file in tqdm(files, desc = "Generating MD5 Hashes:", ncols = 100):
#print(files[file])
files[file]['md5_checksums'] = {}
for folder in files[file]['folders']:
#print(files[file]['folders'][folder])
if files[file]['folders'][folder] is not False:
#print('folder is not false.')
k = os.path.join(files[file]['folders'][folder], files[file]['name'])
#print(k)
files[file]['md5_checksums'][k] = md5_hash(k)
#print(files[file]['md5_checksums'][k])
for file in tqdm(files, desc = "Verifying Checksums:", ncols = 100):
i = 0
c = {}
for checksum in files[file]['md5_checksums']:
c[i] = files[file]['md5_checksums'][checksum]
if i > 0:
P = i - 1
if c[i] == c[P]:
files[file]['source_cleanable'] = True
else:
files[file]['source_cleanable'] = False
print(f'FATAL: Checksum validation failed for: \
{files[file]["name"]} \n{c[i]}\n is not equal to \n{c[P]}\n')
print('\n File Meta:\n')
pprint(files[file])
i = i + 1
# cleanup sd
if config['cleanup_sd'] is True:
for file in tqdm(files, desc = "Cleaning Up SD:", ncols = 100):
if files[file]['source_cleanable'] is True:
os.remove(os.path.join(files[file]['folders']['source_path'],files[file]['name']))
#pprint(files)
print('done.') print('done.')