Usr6 Posted June 18, 2014 Report Share Posted June 18, 2014 Dupa cum sugereaza si titlul, cauta toate fisierele dumplicate din calculator + anexe si le scrie in fisierul duplicate.txt sub forma:md5 file size1 cale fisier file size2 cale fisier etc.pentru ca md5 nu mai este considerat 100% sigur, nici nezultatele oferite de acest script nu pot avea o acuratete 100%, @fallen_angel a fost suficient de norocos incat sa dea peste un collision in timpul testelorar trebui sa functioneze fara probleme pe orice sistem windows/*unix/+ubuntu ce are instalat python 2.7#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ##############################"""alfabet =["a","b","c","d","e","f","g","h","i","j","k","l","m", "n","o","p","q","r","s","t","u","v","w","x","y","z"]biblioteca = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: md5hash = hashfile(fullpath) if md5hash not in biblioteca.keys(): biblioteca[md5hash] = [fullpath] else: listapath = biblioteca[md5hash] +[fullpath] biblioteca[md5hash] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass #sys.platformfor i in alfabet: root = i + ":\\" if os.path.isdir(root): fileparsing(root)else: fileparsing("/")for md5_ in biblioteca.keys(): if len(biblioteca[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in biblioteca[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit" 1 Quote Link to comment Share on other sites More sharing options...
fallen_angel Posted June 18, 2014 Report Share Posted June 18, 2014 F?-l bre s? introduci path-ul de unde s? înceap? s? scaneze Quote Link to comment Share on other sites More sharing options...
Usr6 Posted June 18, 2014 Author Report Share Posted June 18, 2014 (edited) F?-l bre s? introduci path-ul de unde s? înceap? s? scaneze Daca tii tu neaparat...#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ###############################fallen edition"""biblioteca = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: md5hash = hashfile(fullpath) if md5hash not in biblioteca.keys(): biblioteca[md5hash] = [fullpath] else: listapath = biblioteca[md5hash] +[fullpath] biblioteca[md5hash] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass root = raw_input("Enter start dir: ")if os.path.isdir(root): fileparsing(root)else: while not os.path.isdir(root) and root != "exit": print "Mai incearca o data sau 'exit' + enter to Exit" root = raw_input("Enter start dir: ") if root != "exit": fileparsing(root) else: sys.exit("Out!")for md5_ in biblioteca.keys(): if len(biblioteca[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in biblioteca[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit"sys.exit("Out!") Edited June 18, 2014 by Usr6 Quote Link to comment Share on other sites More sharing options...
Nytro Posted June 18, 2014 Report Share Posted June 18, 2014 Super, si eu voiam sa fac asa ceva.Cred ca ar fi mai optim sa gasesti dimensiunile fisierelor si sa calculezi hash-ul doar pentru cele cu aceeasi dimensiune. Quote Link to comment Share on other sites More sharing options...
Usr6 Posted June 18, 2014 Author Report Share Posted June 18, 2014 Super, si eu voiam sa fac asa ceva.Cred ca ar fi mai optim sa gasesti dimensiunile fisierelor si sa calculezi hash-ul doar pentru cele cu aceeasi dimensiune.indeed, script updated#!/usr/bin/env pythonimport osimport sys import hashlibprint """############################## Rst Duplicate file finder ## Usr6 ###############################nytro edition*#*are la baza fallen edition"""biblioteca = {}librarie = {}def hashfile(afile): ##### #http://www.pythoncentral.io/hashing-files-with-python/ ##### BLOCKSIZE = 65536 hasher = hashlib.md5() with open(afile, "rb") as afile: buf = afile.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) buf = afile.read(BLOCKSIZE) return hasher.hexdigest()def fileparsing(root): for root, subFolders, files in os.walk(root): for file in files: fullpath = os.path.join(root, file) try: file_size = str(os.path.getsize(fullpath)) if file_size not in biblioteca.keys(): biblioteca[file_size] = [fullpath] else: listapath = biblioteca[file_size] +[fullpath] biblioteca[file_size] = listapath print fullpath except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass root = raw_input("Enter start dir: ")if os.path.isdir(root): fileparsing(root)else: while not os.path.isdir(root) and root != "exit": print "Mai incearca o data sau 'exit' + enter to Exit" root = raw_input("Enter start dir: ") if root != "exit": fileparsing(root) else: sys.exit("Out!")print "Filtram rezultatele:" for f_size in biblioteca.keys(): if len(biblioteca[f_size]) >1: for f_path in biblioteca[f_size]: try: md5hash = hashfile(f_path) if md5hash not in librarie.keys(): librarie[md5hash] = [f_path] else: listapath = librarie[md5hash] +[f_path] librarie[md5hash] = listapath print f_path except Exception as bug: with open("bugs.txt", "a")as handle: handle.write(str(bug) + "\n") handle.close pass print "Scriu rezultatele finale in fisierul 'duplicate.txt'"for md5_ in librarie.keys(): if len(librarie[md5_]) >1: with open("duplicate.txt", "a") as handle: handle.write(str(md5_) + "\n") for f_path in librarie[md5_]: handle.write("\t\t" + str(os.path.getsize(f_path))+ "\t"+ str(f_path) +"\n") handle.close()print "\nRezultate:"print "Buguri:", "bugs.txt" if os.path.isfile("bugs.txt") else "N-am gasit"print "Duplicate: ", "duplicate.txt" if os.path.isfile("duplicate.txt") else "N-am gasit"sys.exit("Out!") 1 Quote Link to comment Share on other sites More sharing options...
Shin Posted June 18, 2014 Report Share Posted June 18, 2014 Piesa! Merci! Incepe sa-mi placa pythonu' asta, cred ca bag si eu un ochi prin ceva cursuri. Quote Link to comment Share on other sites More sharing options...
cmiN Posted June 19, 2014 Report Share Posted June 19, 2014 Vin si eu cu un patch cu rata mai mica de coliziune si cu argumente date frumos la rulare (+ ceva optimizari de cod si bug-uri rezolvate), doar de fun, ca teoretic cam trebuie rescris tot de acolo .Script: http://sprunge.us/LASd?py Quote Link to comment Share on other sites More sharing options...