Usr6 Posted June 18, 2014 Report Posted June 18, 2014 Dupa cum sugereaza si titlul, cauta toate fisierele dumplicate din calculator + anexe si le scrie in fisierul duplicate.txt sub forma:md5 file size1 cale fisier file size2 cale fisier etc.pentru ca md5 nu mai este considerat 100% sigur, nici nezultatele oferite de acest script nu pot avea o acuratete 100%, @fallen_angel a fost suficient de norocos incat sa dea peste un collision in timpul testelorar trebui sa functioneze fara probleme pe orice sistem windows/*unix/+ubuntu ce are instalat python 2.7#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ##############################"""alfabet =["a","b","c","d","e","f","g","h","i","j","k","l","m", "n","o","p","q","r","s","t","u","v","w","x","y","z"]biblioteca = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: md5hash = hashfile(fullpath) if md5hash not in biblioteca.keys(): biblioteca[md5hash] = [fullpath] else: listapath = biblioteca[md5hash] +[fullpath] biblioteca[md5hash] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass #sys.platformfor i in alfabet: root = i + ":\\" if os.path.isdir(root): fileparsing(root)else: fileparsing("/")for md5_ in biblioteca.keys(): if len(biblioteca[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in biblioteca[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit" 1 Quote
fallen_angel Posted June 18, 2014 Report Posted June 18, 2014 F?-l bre s? introduci path-ul de unde s? înceap? s? scaneze Quote
Usr6 Posted June 18, 2014 Author Report Posted June 18, 2014 (edited) F?-l bre s? introduci path-ul de unde s? înceap? s? scaneze Daca tii tu neaparat...#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ###############################fallen edition"""biblioteca = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: md5hash = hashfile(fullpath) if md5hash not in biblioteca.keys(): biblioteca[md5hash] = [fullpath] else: listapath = biblioteca[md5hash] +[fullpath] biblioteca[md5hash] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass root = raw_input("Enter start dir: ")if os.path.isdir(root): fileparsing(root)else: while not os.path.isdir(root) and root != "exit": print "Mai incearca o data sau 'exit' + enter to Exit" root = raw_input("Enter start dir: ") if root != "exit": fileparsing(root) else: sys.exit("Out!")for md5_ in biblioteca.keys(): if len(biblioteca[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in biblioteca[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit"sys.exit("Out!") Edited June 18, 2014 by Usr6 Quote
Nytro Posted June 18, 2014 Report Posted June 18, 2014 Super, si eu voiam sa fac asa ceva.Cred ca ar fi mai optim sa gasesti dimensiunile fisierelor si sa calculezi hash-ul doar pentru cele cu aceeasi dimensiune. Quote
Usr6 Posted June 18, 2014 Author Report Posted June 18, 2014 Super, si eu voiam sa fac asa ceva.Cred ca ar fi mai optim sa gasesti dimensiunile fisierelor si sa calculezi hash-ul doar pentru cele cu aceeasi dimensiune.indeed, script updated#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ###############################nytro edition*#*are la baza fallen edition"""biblioteca = {}librarie = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: file_size = str(os.path.getsize(fullpath)) if file_size not in biblioteca.keys(): biblioteca[file_size] = [fullpath] else: listapath = biblioteca[file_size] +[fullpath] biblioteca[file_size] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass root = raw_input("Enter start dir: ")if os.path.isdir(root): fileparsing(root)else: while not os.path.isdir(root) and root != "exit": print "Mai incearca o data sau 'exit' + enter to Exit" root = raw_input("Enter start dir: ") if root != "exit": fileparsing(root) else: sys.exit("Out!")print "Filtram rezultatele:" for f_size in biblioteca.keys(): if len(biblioteca[f_size]) >1: for f_path in biblioteca[f_size]: try: md5hash = hashfile(f_path) if md5hash not in librarie.keys(): librarie[md5hash] = [f_path] else: listapath = librarie[md5hash] +[f_path] librarie[md5hash] = listapath print f_path except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass print "Scriu rezultatele finale in fisierul 'duplicate.txt'"for md5_ in librarie.keys(): if len(librarie[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in librarie[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit"sys.exit("Out!") 1 Quote
Shin Posted June 18, 2014 Report Posted June 18, 2014 Piesa! Merci! Incepe sa-mi placa pythonu' asta, cred ca bag si eu un ochi prin ceva cursuri. Quote
cmiN Posted June 19, 2014 Report Posted June 19, 2014 Vin si eu cu un patch cu rata mai mica de coliziune si cu argumente date frumos la rulare (+ ceva optimizari de cod si bug-uri rezolvate), doar de fun, ca teoretic cam trebuie rescris tot de acolo .Script: http://sprunge.us/LASd?py Quote