36 lines
1.7 KiB
Python
36 lines
1.7 KiB
Python
|
import os
|
||
|
from Sample_cleaning import de_PE
|
||
|
from Sample_cleaning import de_oversize
|
||
|
import shutil
|
||
|
from Sample_cleaning import de_repeatSample
|
||
|
from Sample_cleaning import de_shell
|
||
|
from Sample_cleaning import de_cfg
|
||
|
if __name__ == "__main__":
|
||
|
# 获取清洗样本的存放目录
|
||
|
# shutil.copyfile(r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\T\ccs2023a-paper48.pdf",
|
||
|
# r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\save_dir")
|
||
|
sampleFile_dir = os.path.join(os.getcwd(), "Sample")
|
||
|
# out_dir=os.path.join(os.getcwd(), "save_dir")
|
||
|
# print(sampleFile_dir)
|
||
|
# print(out_dir)
|
||
|
# 1.清洗非可执行程序
|
||
|
# notexe_save_dir=os.path.join(os.getcwd(),"notexe_dir")
|
||
|
# test = de_PE.deleteNotPE(sampleFile_dir=sampleFile_dir,save_dir=notexe_save_dir)
|
||
|
# test.notPEfile_Filter()
|
||
|
# 2.清洗加壳程序
|
||
|
# shellsave_dir=os.path.join(os.getcwd(),"shell_dir")
|
||
|
# test2=de_shell.de_shell(sample_path=sampleFile_dir,save_dir=shellsave_dir)
|
||
|
# test2.fileFilter()
|
||
|
# 3.清洗不可表征cfg样本
|
||
|
notcfg_dir=os.path.join(os.getcwd(),"notcfg_dir")
|
||
|
test4=de_cfg.de_notcfg(sampleFile_dir=sampleFile_dir,save_dir=notcfg_dir)
|
||
|
test4.notcfgfile_Filter()
|
||
|
# 4.清洗过大样本文件
|
||
|
# oversizefile_save_dir=os.path.join(os.getcwd(),"oversizefile_dir")
|
||
|
# test4=de_oversize.de_oversizefile(sampleFile_dir=sampleFile_dir,max_file_size=10,min_file_size=0.01,save_dir=oversizefile_save_dir)
|
||
|
# test4.filesizeFilter()
|
||
|
# 5.删除重复的文件
|
||
|
# repeatfilesave_dir=os.path.join(os.getcwd(),"repeatfile_dir")
|
||
|
# test5=de_repeatSample.de_repeatfile(sampleFile_dir=sampleFile_dir,save_dir=repeatfilesave_dir)
|
||
|
# test5.remove_duplicate_files()
|