BitMover/_hashing.py

74 lines
2.3 KiB
Python

#!/usr/bin/env python
"""
dump the dictionary generated from findings into a yaml file for later inspection
"""
import os
import xxhash
from tqdm import tqdm
# from configure import Configure, CONFIG_FILE
from lumberjack import timber
# c = Configure(CONFIG_FILE)
# config = c.load_config()
log = timber(__name__)
def xx_hash(file):
""" calculates and returns file hash based on xxHash """
size = os.path.getsize(file)
hasher = xxhash.xxh64()
with open(file, 'rb') as f:
with tqdm(total=size,
unit='B',
unit_scale=True,
desc=f'Getting hash for {os.path.basename(file)}') as pbar:
for chunk in iter(lambda: f.read(4096), b""):
hasher.update(chunk)
pbar.update(len(chunk))
file_hash = hasher.hexdigest()
return file_hash
def hash_path(path):
""" hashes a string passed as a path """
hasher = xxhash.xxh64(path)
return hasher.hexdigest()
def gen_xxhashes(f):
""" Generate xxHashes """
log.debug(f'gen_xxhashes({f})')
for file in tqdm(f, desc = "Generating xx Hashes:"):
os.system('clear')
log.debug(f[file])
f[file]['xx_checksums'] = {}
for folder in f[file]['folders']:
k = os.path.join(f[file]['folders'][folder], f[file]['name'])
if k != f[file]['name']:
# k = f[file]['folders'][folder]
log.debug(k)
f[file]['xx_checksums'][k] = xx_hash(k)
log.debug(f"{k}: {f[file]['xx_checksums'][k]}")
log.debug(f[file])
def validate_xx_checksums(f):
""" Validate Checksums """
for file in tqdm(f, desc = "Verifying Checksums:"):
os.system('clear')
i = 0
c = {}
for checksum in f[file]['xx_checksums']:
c[i] = f[file]['xx_checksums'][checksum]
if i > 0:
p = i - 1
if c[i] == c[p]:
f[file]['source_cleanable'] = True
else:
f[file]['source_cleanable'] = False
log.critical(f'FATAL: Checksum validation failed for: \
{f[file]["name"]} \n{c[i]}\n is not equal to \n{c[p]}\n')
log.debug('\n File Meta:\n')
log.debug(f[file])
i = i + 1