#!/usr/bin/env python import argparse import sys import os from bitmover import copy_from_source from file_stuff import create_folder, cmp_files from lumberjack import timber log = timber(__name__) parser = argparse.ArgumentParser() parser.add_argument("-f", "--folder", help = "folder with files to rename") parser.add_argument("-d", "--dryrun", help = "dry run, no action") args = parser.parse_args() if args.folder: FOLDER = args.folder else: print("you need to specify a folder.") sys.exit() if args.dryrun: dry_run = True else: dry_run = False def get_file_size(f_name): return os.path.getsize(f_name) l = [] files = os.listdir(FOLDER) dup_folder = os.path.join(FOLDER,"__dups") SIZE = 0 MAX_SIZE = 0 BIGGEST_FILE = '' create_folder(dup_folder) f_list = {} dictionary = {} for x in files: if os.path.isfile(os.path.join(FOLDER,x)): if x.lower().endswith("jpg") or x.lower().endswith("jpeg"): group = dictionary.get(x[:23],[]) group.append(x) dictionary[x[:23]] = group for g in dictionary: f_list[g] = {'files': {}} for f in dictionary[g]: p = os.path.join(FOLDER,f) size = os.path.getsize(p) f_list[g]['files'][f] = {} f_list[g]['files'][f]['path'] = p f_list[g]['files'][f]['size'] = size # print(f_list) for g in f_list: MAX_SIZE = 0 log.debug(g) if len(f_list[g]['files']) > 1: for f in f_list[g]['files']: log.debug(f"{f_list[g]['files'][f]['path']}: {f_list[g]['files'][f]['size']}") SIZE = f_list[g]['files'][f]['size'] if SIZE > MAX_SIZE: MAX_SIZE = SIZE BIGGEST_FILE = f_list[g]['files'][f]['path'] log.debug(f'New Biggest File: {BIGGEST_FILE}, {MAX_SIZE*1024}KB') f_list[g]['biggest_file'] = BIGGEST_FILE else: log.debug(f'Only 1 file in {g}') for g in f_list: # log.debug(g) if len(f_list[g]['files']) > 1: for f in f_list[g]['files']: if f_list[g]['biggest_file'] != f_list[g]['files'][f]['path']: copy_from_source(FOLDER, dup_folder, os.path.basename(f_list[g]['files'][f]['path'])) file_match = cmp_files(f_list[g]['files'][f]['path'], os.path.join(dup_folder, os.path.basename(f_list[g]['files'][f]['path']))) if file_match is True: os.remove(f_list[g]['files'][f]['path']) else: print(f"{f_list[g]['files'][f]['path']} does not match {os.path.join(dup_folder, os.path.basename(f_list[g]['files'][f]['path']))}")