31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
|
import os
|
||
|
import hashlib
|
||
|
import shutil
|
||
|
|
||
|
def md5sum(file_path):
|
||
|
with open(file_path, "rb") as f:
|
||
|
md5 = hashlib.md5()
|
||
|
while True:
|
||
|
data = f.read(1024*1024)
|
||
|
if not data:
|
||
|
break
|
||
|
md5.update(data)
|
||
|
return md5.hexdigest()
|
||
|
|
||
|
def copy_executables(src_folder, dst_folder):
|
||
|
md5_dict = {}
|
||
|
for file_name in os.listdir(src_folder):
|
||
|
file_path = os.path.join(src_folder, file_name)
|
||
|
if os.path.isfile(file_path) and os.access(file_path, os.X_OK):
|
||
|
md5 = md5sum(file_path)
|
||
|
if md5 in md5_dict:
|
||
|
print(f"Skipping duplicate file: {file_path}")
|
||
|
continue
|
||
|
md5_dict[md5] = file_name
|
||
|
new_file_name = md5 + os.path.splitext(file_name)[1]
|
||
|
new_file_path = os.path.join(dst_folder, new_file_name)
|
||
|
shutil.copy(file_path, new_file_path)
|
||
|
print(f"Copied file: {file_path} -> {new_file_path}")
|
||
|
if __name__ == '__main__':
|
||
|
#源文件夹、拷贝文件夹
|
||
|
copy_executables(r"D:\detect_exp_d\data\benign_pre", r"D:\detect_exp_d\data\benign_last")
|