文件去重(基于文件哈希值,python实现)

08/23/2019 22:48:28 阅读(2299)

import hashlib
import glob
import os

dir = 'C:/tmp/src/*'
def sha1(fname):
    sha1 = hashlib.sha1()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            sha1.update(chunk)
    return sha1.hexdigest()

list = []
for v in glob.glob(dir, recursive=True):
    hash = sha1(v)
    if hash not in list:
        list.append(hash)
    else:
        os.remove(v)
返回