import os import hashlib import shutil def md5sum(file_path): with open(file_path, "rb") as f: md5 = hashlib.md5() while True: data = f.read(1024*1024) if not data: break md5.update(data) return md5.hexdigest() def copy_executables(src_folder, dst_folder): md5_dict = {} for file_name in os.listdir(src_folder): file_path = os.path.join(src_folder, file_name) if os.path.isfile(file_path) and os.access(file_path, os.X_OK): md5 = md5sum(file_path) if md5 in md5_dict: print(f"Skipping duplicate file: {file_path}") continue md5_dict[md5] = file_name new_file_name = md5 + os.path.splitext(file_name)[1] new_file_path = os.path.join(dst_folder, new_file_name) shutil.copy(file_path, new_file_path) print(f"Copied file: {file_path} -> {new_file_path}") if __name__ == '__main__': #源文件夹、拷贝文件夹 copy_executables(r"D:\detect_exp_d\data\benign_pre", r"D:\detect_exp_d\data\benign_last")