sample_cleaning_demo/usage.py
2023-05-12 11:20:02 +08:00

36 lines
1.7 KiB
Python

import os
from Sample_cleaning import de_PE
from Sample_cleaning import de_oversize
import shutil
from Sample_cleaning import de_repeatSample
from Sample_cleaning import de_shell
from Sample_cleaning import de_cfg
if __name__ == "__main__":
# 获取清洗样本的存放目录
# shutil.copyfile(r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\T\ccs2023a-paper48.pdf",
# r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\save_dir")
sampleFile_dir = os.path.join(os.getcwd(), "Sample")
# out_dir=os.path.join(os.getcwd(), "save_dir")
# print(sampleFile_dir)
# print(out_dir)
# 1.清洗非可执行程序
# notexe_save_dir=os.path.join(os.getcwd(),"notexe_dir")
# test = de_PE.deleteNotPE(sampleFile_dir=sampleFile_dir,save_dir=notexe_save_dir)
# test.notPEfile_Filter()
# 2.清洗加壳程序
# shellsave_dir=os.path.join(os.getcwd(),"shell_dir")
# test2=de_shell.de_shell(sample_path=sampleFile_dir,save_dir=shellsave_dir)
# test2.fileFilter()
# 3.清洗不可表征cfg样本
notcfg_dir=os.path.join(os.getcwd(),"notcfg_dir")
test4=de_cfg.de_notcfg(sampleFile_dir=sampleFile_dir,save_dir=notcfg_dir)
test4.notcfgfile_Filter()
# 4.清洗过大样本文件
# oversizefile_save_dir=os.path.join(os.getcwd(),"oversizefile_dir")
# test4=de_oversize.de_oversizefile(sampleFile_dir=sampleFile_dir,max_file_size=10,min_file_size=0.01,save_dir=oversizefile_save_dir)
# test4.filesizeFilter()
# 5.删除重复的文件
# repeatfilesave_dir=os.path.join(os.getcwd(),"repeatfile_dir")
# test5=de_repeatSample.de_repeatfile(sampleFile_dir=sampleFile_dir,save_dir=repeatfilesave_dir)
# test5.remove_duplicate_files()