refactoring and add tqdm
This commit is contained in:
parent
c884753f09
commit
c23164d428
265
import_media.py
265
import_media.py
|
@ -4,22 +4,11 @@
|
||||||
Import photos from SD card into folder with todays date + nickname
|
Import photos from SD card into folder with todays date + nickname
|
||||||
Use: importphotos (--jpg|--raw|--both) <nickname of folder (optional)>
|
Use: importphotos (--jpg|--raw|--both) <nickname of folder (optional)>
|
||||||
Add script to path
|
Add script to path
|
||||||
'''
|
|
||||||
|
|
||||||
'''
|
|
||||||
TODO:
|
TODO:
|
||||||
1. Import configuration from config file
|
|
||||||
2. Set raw file extension based on camera specified in configuration
|
|
||||||
3. Create destination folders based on concatination of configuration,
|
|
||||||
metadata, and event name passed from ARG
|
|
||||||
4. Create destination sub-folder based on filetype
|
|
||||||
5. Copy files to appropriate folder
|
|
||||||
6. Compare files from source
|
|
||||||
7. Create 'originals' with copy of files from destination after
|
|
||||||
checksum for photos only
|
|
||||||
8. Optinally allow specification of a backup location on another disk
|
8. Optinally allow specification of a backup location on another disk
|
||||||
or NAS to ship a 3rd copy to
|
or NAS to ship a 3rd copy to
|
||||||
9. Optionally cleanup SD only after checksum matching
|
|
||||||
10. Every config option has an arg override
|
10. Every config option has an arg override
|
||||||
11. Optionally rename file if event name was passed in
|
11. Optionally rename file if event name was passed in
|
||||||
-- STRETCH --
|
-- STRETCH --
|
||||||
|
@ -28,25 +17,27 @@ TODO:
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import yaml
|
from pprint import pprint
|
||||||
import argparse
|
import argparse
|
||||||
import shutil
|
import shutil
|
||||||
import hashlib
|
import hashlib
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from tqdm import tqdm
|
||||||
|
import yaml
|
||||||
import exifread
|
import exifread
|
||||||
import ffmpeg
|
import ffmpeg
|
||||||
|
|
||||||
config_file = 'config.yaml'
|
CONFIG_FILE = 'config.yaml'
|
||||||
|
files = {}
|
||||||
|
|
||||||
# Read configuration from file
|
# Read configuration from file
|
||||||
try:
|
try:
|
||||||
with open(config_file, 'r') as f:
|
with open(CONFIG_FILE, 'r') as f:
|
||||||
config = yaml.load(f, Loader=yaml.FullLoader)
|
config = yaml.load(f, Loader=yaml.FullLoader)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print("Configuration file not found: ", config_file)
|
print("Configuration file not found: ", CONFIG_FILE)
|
||||||
print("Copy config.yaml.EXAMPLE to ", config_file, " and update accordingly.")
|
print("Copy config.yaml.EXAMPLE to ", CONFIG_FILE, " and update accordingly.")
|
||||||
|
|
||||||
''' Parse Arguments '''
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-e", "--event", help = "Event Name")
|
parser.add_argument("-e", "--event", help = "Event Name")
|
||||||
|
|
||||||
|
@ -56,29 +47,24 @@ if args.event:
|
||||||
event = args.event
|
event = args.event
|
||||||
|
|
||||||
def md5_hash(f):
|
def md5_hash(f):
|
||||||
print("calculating md5 for ", f)
|
""" calculates and returns md5 hash """
|
||||||
|
#print("calculating md5 for ", f)
|
||||||
md5 = hashlib.md5(open(f, 'rb').read()).hexdigest()
|
md5 = hashlib.md5(open(f, 'rb').read()).hexdigest()
|
||||||
return md5
|
return md5
|
||||||
|
|
||||||
def cmp_files(f1,f2):
|
def cmp_files(f1,f2):
|
||||||
print('comparing md5 hashes...')
|
""" Use file hashes to compare files """
|
||||||
return md5_hash(f1) == md5_hash(f2)
|
return md5_hash(f1) == md5_hash(f2)
|
||||||
|
|
||||||
def file_classification(f):
|
|
||||||
print('Classifying media for: ', f)
|
|
||||||
for classification in config['file_types']:
|
|
||||||
for ext in config['file_types'][classification]:
|
|
||||||
if f.lower().endswith(ext):
|
|
||||||
c = classification
|
|
||||||
return classification
|
|
||||||
|
|
||||||
def get_capture_date(p, t):
|
def get_capture_date(p, t):
|
||||||
|
""" get capture date from meta """
|
||||||
if t == 'image':
|
if t == 'image':
|
||||||
with open(p, 'rb') as f:
|
with open(p, 'rb') as f:
|
||||||
tags = exifread.process_file(f)
|
tags = exifread.process_file(f)
|
||||||
stamp = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S')
|
stamp = datetime.strptime(str(tags['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S')
|
||||||
elif t == 'video':
|
elif t == 'video':
|
||||||
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['creation_time'],
|
||||||
|
'%Y-%m-%dT%H:%M:%S.%f%z')
|
||||||
elif t == 'audio':
|
elif t == 'audio':
|
||||||
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['date'], '%Y-%m-%d')
|
stamp = datetime.strptime(ffmpeg.probe(p)['format']['tags']['date'], '%Y-%m-%d')
|
||||||
else:
|
else:
|
||||||
|
@ -90,113 +76,166 @@ def get_capture_date(p, t):
|
||||||
return year, month, day
|
return year, month, day
|
||||||
|
|
||||||
def create_folder(f):
|
def create_folder(f):
|
||||||
|
""" Function to create folder """
|
||||||
try:
|
try:
|
||||||
os.makedirs(f)
|
os.makedirs(f)
|
||||||
except FileExistsError as exists:
|
except FileExistsError:
|
||||||
print()
|
pass
|
||||||
|
|
||||||
def copy_from_source(p, dest_folder, dest_orig_folder, file):
|
def copy_from_source(p,d,f):
|
||||||
if os.path.exists(os.path.join(dest_folder, file)):
|
""" Copy file from source to destination """
|
||||||
check_match = cmp_files(p, os.path.join(dest_folder, file))
|
if os.path.exists(os.path.join(d, f)):
|
||||||
if check_match == False:
|
check_match = cmp_files(p, os.path.join(d, f))
|
||||||
|
if check_match is False:
|
||||||
print(f'Found duplicate for {p}, renaming destination with md5 appended.')
|
print(f'Found duplicate for {p}, renaming destination with md5 appended.')
|
||||||
base, extension = os.path.splitext(file)
|
base, extension = os.path.splitext(f)
|
||||||
file_name_hash = base + '_' + md5_hash(os.path.join(dest_folder, file)) + extension
|
file_name_hash = base + '_' + md5_hash(os.path.join(d, f)) + extension
|
||||||
os.rename(os.path.join(dest_folder, file), os.path.join(dest_folder, file_name_hash))
|
os.rename(os.path.join(d, f), os.path.join(d, file_name_hash))
|
||||||
|
|
||||||
shutil.copy(p, dest_folder)
|
|
||||||
check_match = cmp_files(p, dest_folder + '/' + file)
|
|
||||||
if check_match == False:
|
|
||||||
print(f'CRITICAL: md5 hash does not match for {file}')
|
|
||||||
print(p, ': ', md5_hash(p))
|
|
||||||
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
|
|
||||||
exit
|
|
||||||
|
|
||||||
if dest_orig_folder != False:
|
|
||||||
shutil.copy(dest_folder + '/' + file, dest_orig_folder)
|
|
||||||
check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file)
|
|
||||||
if check_match == False:
|
|
||||||
print(f'CRITICAL: md5 hash does not match for {file}')
|
|
||||||
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
|
|
||||||
print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file))
|
|
||||||
exit
|
|
||||||
else:
|
else:
|
||||||
shutil.copy(p, dest_folder)
|
return
|
||||||
check_match = cmp_files(p, dest_folder + '/' + file)
|
|
||||||
if check_match == False:
|
|
||||||
print(f'CRITICAL: md5 hash does not match for {file}')
|
|
||||||
print(p, ': ', md5_hash(p))
|
|
||||||
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
|
|
||||||
exit
|
|
||||||
|
|
||||||
if dest_orig_folder != False:
|
shutil.copy(p, d)
|
||||||
shutil.copy(dest_folder + '/' + file, dest_orig_folder)
|
|
||||||
check_match = cmp_files(dest_folder + '/' + file, dest_orig_folder + '/' + file)
|
|
||||||
if check_match == False:
|
|
||||||
print(f'CRITICAL: md5 hash does not match for {file}')
|
|
||||||
print(dest_folder + '/' + file, ': ', md5_hash(dest_folder + '/' + file))
|
|
||||||
print(dest_orig_folder + '/' + file, ': ', md5_hash(dest_orig_folder + '/' + file))
|
|
||||||
exit
|
|
||||||
|
|
||||||
# Blindly assume md5 check has passed...
|
def process_file(path, f_type, f_name, ext):
|
||||||
if config['cleanup_sd'] == True:
|
""" gather information and add to dictionary """
|
||||||
os.remove(p)
|
|
||||||
|
|
||||||
def process_file(p, t, file, ext):
|
i = os.path.join(path,f_name)
|
||||||
capture_date = get_capture_date(p, t)
|
|
||||||
y = capture_date[0]
|
files[i] = { 'folders': {}, 'date': {} }
|
||||||
m = capture_date[1]
|
|
||||||
d = capture_date[2]
|
files[i]['folders']['source_path'] = path
|
||||||
|
files[i]['type'] = f_type
|
||||||
|
files[i]['name'] = f_name
|
||||||
|
files[i]['extension'] = ext
|
||||||
|
|
||||||
|
files[i]['date']['capture_date'] = get_capture_date(
|
||||||
|
os.path.join(files[i]['folders']['source_path'],
|
||||||
|
files[i]['name']),files[i]['type'])
|
||||||
|
files[i]['date']['y'] = files[i]['date']['capture_date'][0]
|
||||||
|
files[i]['date']['m'] = files[i]['date']['capture_date'][1]
|
||||||
|
files[i]['date']['d'] = files[i]['date']['capture_date'][2]
|
||||||
|
|
||||||
if event:
|
if event:
|
||||||
dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d + '-' + event
|
files[i]['folders']['destination'] = config['folders']['destination']['base'] + \
|
||||||
|
'/' + files[i]['date']['y'] + '/' + \
|
||||||
|
files[i]['date']['y'] + '-' + \
|
||||||
|
files[i]['date']['m'] + '/' + \
|
||||||
|
files[i]['date']['y'] + '-' + \
|
||||||
|
files[i]['date']['m'] + '-' + \
|
||||||
|
files[i]['date']['d'] + '-' + \
|
||||||
|
event
|
||||||
else:
|
else:
|
||||||
dest_folder = config['folders']['destination']['base'] + '/' + y + '/' + y + '-' + m + '/' + y + '-' + m + '-' + d
|
files[i]['folders']['destination'] = config['folders']['destination']['base'] + \
|
||||||
|
'/' + files[i]['date']['y'] + '/' + \
|
||||||
|
files[i]['date']['y'] + '-' + \
|
||||||
|
files[i]['date']['m'] + '/' + \
|
||||||
|
files[i]['date']['y'] + '-' + \
|
||||||
|
files[i]['date']['m'] + '-' + \
|
||||||
|
files[i]['date']['d']
|
||||||
|
|
||||||
if t == 'image':
|
if files[i]['type'] == 'image':
|
||||||
dest_folder = dest_folder + '/photos'
|
files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/PHOTO'
|
||||||
|
|
||||||
if config['store_originals'] == True:
|
if config['store_originals'] is True:
|
||||||
dest_orig_folder = dest_folder + '/ORIGINALS'
|
files[i]['folders']['destination_original'] = files[i]['folders']['destination'] + \
|
||||||
|
'/ORIGINALS'
|
||||||
|
|
||||||
if ext in ('jpg', 'jpeg'):
|
if files[i]['extension'] in ('jpg', 'jpeg'):
|
||||||
dest_folder = dest_folder + '/JPG'
|
files[i]['folders']['destination'] = files[i]['folders']['destination'] + \
|
||||||
if dest_orig_folder:
|
'/JPG'
|
||||||
dest_orig_folder = dest_orig_folder + '/JPG'
|
if files[i]['folders']['destination_original']:
|
||||||
|
files[i]['folders']['destination_original'] = \
|
||||||
|
files[i]['folders']['destination_original'] + '/JPG'
|
||||||
else:
|
else:
|
||||||
dest_folder = dest_folder + '/RAW'
|
files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/RAW'
|
||||||
if dest_orig_folder:
|
if files[i]['folders']['destination_original']:
|
||||||
dest_orig_folder = dest_orig_folder + '/RAW'
|
files[i]['folders']['destination_original'] = \
|
||||||
|
files[i]['folders']['destination_original'] + '/RAW'
|
||||||
|
|
||||||
elif t == 'video':
|
elif files[i]['type'] == 'video':
|
||||||
dest_folder = dest_folder + '/VIDEO'
|
files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/VIDEO'
|
||||||
|
|
||||||
elif t == 'audio':
|
elif files[i]['type'] == 'audio':
|
||||||
dest_folder = dest_folder + '/AUDIO'
|
files[i]['folders']['destination'] = files[i]['folders']['destination'] + '/AUDIO'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(f'WARN: {t} is not a known type and you never should have landed here.')
|
print('WARN: ', files[i]['type'], ' is not a known type and you never should have landed here.')
|
||||||
|
|
||||||
create_folder(dest_folder)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
dest_orig_folder
|
files[i]['folders']['destination_original']
|
||||||
except NameError:
|
except (NameError, KeyError):
|
||||||
dest_orig_folder = False
|
files[i]['folders']['destination_original'] = False
|
||||||
else:
|
|
||||||
create_folder(dest_orig_folder)
|
|
||||||
|
|
||||||
copy_from_source(p, dest_folder, dest_orig_folder, file)
|
# copy_from_source(p, d, o, file)
|
||||||
|
|
||||||
|
def find_files(directory):
|
||||||
def file_list(directory):
|
""" find files to build a dictionary out of """
|
||||||
for folder, subfolders, filename in os.walk(directory):
|
for folder, subfolders, filename in tqdm(os.walk(directory), desc = 'Finding Files', ncols = 100):
|
||||||
for t in config['file_types']:
|
for f_type in config['file_types']:
|
||||||
for ext in config['file_types'][t]:
|
for ext in tqdm(config['file_types'][f_type], desc = 'Finding ' + f_type + ' Files', ncols = 100):
|
||||||
for file in filename:
|
for file in tqdm(filename, desc = 'Finding ' + ext + ' Files', ncols = 100):
|
||||||
if file.lower().endswith(ext):
|
if file.lower().endswith(ext):
|
||||||
p = folder + '/' + file
|
process_file(folder, f_type, file, ext)
|
||||||
process_file(p, t, file, ext)
|
|
||||||
|
|
||||||
file_list(config['folders']['source']['base'])
|
find_files(config['folders']['source']['base'])
|
||||||
|
|
||||||
|
#pprint(files)
|
||||||
|
|
||||||
|
#for file in files:
|
||||||
|
# pprint(files[file]['folders'])
|
||||||
|
|
||||||
|
# Copy File
|
||||||
|
for file in tqdm(files, desc = "Copying Files:", ncols = 100):
|
||||||
|
create_folder(files[file]['folders']['destination'])
|
||||||
|
|
||||||
|
copy_from_source(os.path.join(files[file]['folders']['source_path'],files[file]['name']),
|
||||||
|
files[file]['folders']['destination'],
|
||||||
|
files[file]['name'])
|
||||||
|
|
||||||
|
if files[file]['folders']['destination_original'] is not False:
|
||||||
|
create_folder(files[file]['folders']['destination_original'])
|
||||||
|
|
||||||
|
copy_from_source(os.path.join(files[file]['folders']['destination'],files[file]['name']),
|
||||||
|
files[file]['folders']['destination_original'],
|
||||||
|
files[file]['name'])
|
||||||
|
|
||||||
|
# validate checksum
|
||||||
|
for file in tqdm(files, desc = "Generating MD5 Hashes:", ncols = 100):
|
||||||
|
|
||||||
|
#print(files[file])
|
||||||
|
files[file]['md5_checksums'] = {}
|
||||||
|
for folder in files[file]['folders']:
|
||||||
|
#print(files[file]['folders'][folder])
|
||||||
|
if files[file]['folders'][folder] is not False:
|
||||||
|
#print('folder is not false.')
|
||||||
|
k = os.path.join(files[file]['folders'][folder], files[file]['name'])
|
||||||
|
#print(k)
|
||||||
|
|
||||||
|
files[file]['md5_checksums'][k] = md5_hash(k)
|
||||||
|
#print(files[file]['md5_checksums'][k])
|
||||||
|
|
||||||
|
for file in tqdm(files, desc = "Verifying Checksums:", ncols = 100):
|
||||||
|
i = 0
|
||||||
|
c = {}
|
||||||
|
for checksum in files[file]['md5_checksums']:
|
||||||
|
c[i] = files[file]['md5_checksums'][checksum]
|
||||||
|
if i > 0:
|
||||||
|
P = i - 1
|
||||||
|
if c[i] == c[P]:
|
||||||
|
files[file]['source_cleanable'] = True
|
||||||
|
else:
|
||||||
|
files[file]['source_cleanable'] = False
|
||||||
|
print(f'FATAL: Checksum validation failed for: \
|
||||||
|
{files[file]["name"]} \n{c[i]}\n is not equal to \n{c[P]}\n')
|
||||||
|
print('\n File Meta:\n')
|
||||||
|
pprint(files[file])
|
||||||
|
i = i + 1
|
||||||
|
|
||||||
|
# cleanup sd
|
||||||
|
if config['cleanup_sd'] is True:
|
||||||
|
for file in tqdm(files, desc = "Cleaning Up SD:", ncols = 100):
|
||||||
|
if files[file]['source_cleanable'] is True:
|
||||||
|
os.remove(os.path.join(files[file]['folders']['source_path'],files[file]['name']))
|
||||||
|
|
||||||
|
#pprint(files)
|
||||||
print('done.')
|
print('done.')
|
||||||
|
|
Loading…
Reference in New Issue