92 lines
2.7 KiB
Python
Executable File
92 lines
2.7 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
import argparse
|
|
import sys
|
|
import os
|
|
|
|
from bitmover import copy_from_source
|
|
from file_stuff import create_folder, cmp_files
|
|
from lumberjack import timber
|
|
|
|
log = timber(__name__)
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-f", "--folder", help = "folder with files to rename")
|
|
parser.add_argument("-d", "--dryrun", help = "dry run, no action")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.folder:
|
|
FOLDER = args.folder
|
|
else:
|
|
print("you need to specify a folder. Dedup")
|
|
sys.exit()
|
|
if args.dryrun:
|
|
dry_run = True
|
|
else:
|
|
dry_run = False
|
|
|
|
def get_file_size(f_name):
|
|
return os.path.getsize(f_name)
|
|
|
|
|
|
l = []
|
|
files = os.listdir(FOLDER)
|
|
dup_folder = os.path.join(FOLDER,"__dups")
|
|
SIZE = 0
|
|
MAX_SIZE = 0
|
|
BIGGEST_FILE = ''
|
|
|
|
create_folder(dup_folder)
|
|
f_list = {}
|
|
dictionary = {}
|
|
for x in files:
|
|
if os.path.isfile(os.path.join(FOLDER,x)):
|
|
if x.lower().endswith("jpg") or x.lower().endswith("jpeg"):
|
|
group = dictionary.get(x[:23],[])
|
|
group.append(x)
|
|
dictionary[x[:23]] = group
|
|
|
|
for g in dictionary:
|
|
f_list[g] = {'files': {}}
|
|
|
|
for f in dictionary[g]:
|
|
p = os.path.join(FOLDER,f)
|
|
size = os.path.getsize(p)
|
|
f_list[g]['files'][f] = {}
|
|
f_list[g]['files'][f]['path'] = p
|
|
f_list[g]['files'][f]['size'] = size
|
|
|
|
# print(f_list)
|
|
|
|
for g in f_list:
|
|
MAX_SIZE = 0
|
|
log.debug(g)
|
|
if len(f_list[g]['files']) > 1:
|
|
for f in f_list[g]['files']:
|
|
log.debug(f"{f_list[g]['files'][f]['path']}: {f_list[g]['files'][f]['size']}")
|
|
SIZE = f_list[g]['files'][f]['size']
|
|
|
|
if SIZE > MAX_SIZE:
|
|
MAX_SIZE = SIZE
|
|
BIGGEST_FILE = f_list[g]['files'][f]['path']
|
|
log.debug(f'New Biggest File: {BIGGEST_FILE}, {MAX_SIZE*1024}KB')
|
|
f_list[g]['biggest_file'] = BIGGEST_FILE
|
|
else:
|
|
log.debug(f'Only 1 file in {g}')
|
|
|
|
for g in f_list:
|
|
# log.debug(g)
|
|
if len(f_list[g]['files']) > 1:
|
|
for f in f_list[g]['files']:
|
|
if f_list[g]['biggest_file'] != f_list[g]['files'][f]['path']:
|
|
copy_from_source(FOLDER, dup_folder, os.path.basename(f_list[g]['files'][f]['path']))
|
|
|
|
file_match = cmp_files(f_list[g]['files'][f]['path'],
|
|
os.path.join(dup_folder,
|
|
os.path.basename(f_list[g]['files'][f]['path'])))
|
|
|
|
if file_match is True:
|
|
os.remove(f_list[g]['files'][f]['path'])
|
|
else:
|
|
print(f"{f_list[g]['files'][f]['path']} does not match {os.path.join(dup_folder, os.path.basename(f_list[g]['files'][f]['path']))}") |