BitMover/dedup.py

92 lines
2.7 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
import sys
import os
from bitmover import copy_from_source
from file_stuff import create_folder, cmp_files
from lumberjack import timber
log = timber(__name__)
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--folder", help = "folder with files to rename")
parser.add_argument("-d", "--dryrun", help = "dry run, no action")
args = parser.parse_args()
if args.folder:
FOLDER = args.folder
else:
print("you need to specify a folder.")
sys.exit()
if args.dryrun:
dry_run = True
else:
dry_run = False
def get_file_size(f_name):
return os.path.getsize(f_name)
l = []
files = os.listdir(FOLDER)
dup_folder = os.path.join(FOLDER,"__dups")
SIZE = 0
MAX_SIZE = 0
BIGGEST_FILE = ''
create_folder(dup_folder)
f_list = {}
dictionary = {}
for x in files:
if os.path.isfile(os.path.join(FOLDER,x)):
if x.lower().endswith("jpg") or x.lower().endswith("jpeg"):
group = dictionary.get(x[:23],[])
group.append(x)
dictionary[x[:23]] = group
for g in dictionary:
f_list[g] = {'files': {}}
for f in dictionary[g]:
p = os.path.join(FOLDER,f)
size = os.path.getsize(p)
f_list[g]['files'][f] = {}
f_list[g]['files'][f]['path'] = p
f_list[g]['files'][f]['size'] = size
# print(f_list)
for g in f_list:
MAX_SIZE = 0
log.debug(g)
if len(f_list[g]['files']) > 1:
for f in f_list[g]['files']:
log.debug(f"{f_list[g]['files'][f]['path']}: {f_list[g]['files'][f]['size']}")
SIZE = f_list[g]['files'][f]['size']
if SIZE > MAX_SIZE:
MAX_SIZE = SIZE
BIGGEST_FILE = f_list[g]['files'][f]['path']
log.debug(f'New Biggest File: {BIGGEST_FILE}, {MAX_SIZE*1024}KB')
f_list[g]['biggest_file'] = BIGGEST_FILE
else:
log.debug(f'Only 1 file in {g}')
for g in f_list:
# log.debug(g)
if len(f_list[g]['files']) > 1:
for f in f_list[g]['files']:
if f_list[g]['biggest_file'] != f_list[g]['files'][f]['path']:
copy_from_source(FOLDER, dup_folder, os.path.basename(f_list[g]['files'][f]['path']))
file_match = cmp_files(f_list[g]['files'][f]['path'],
os.path.join(dup_folder,
os.path.basename(f_list[g]['files'][f]['path'])))
if file_match is True:
os.remove(f_list[g]['files'][f]['path'])
else:
print(f"{f_list[g]['files'][f]['path']} does not match {os.path.join(dup_folder, os.path.basename(f_list[g]['files'][f]['path']))}")