import os from Sample_cleaning import de_PE from Sample_cleaning import de_oversize import shutil from Sample_cleaning import de_repeatSample from Sample_cleaning import de_shell from Sample_cleaning import de_cfg if __name__ == "__main__": # 获取清洗样本的存放目录 # shutil.copyfile(r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\T\ccs2023a-paper48.pdf", # r"C:\Users\Administrator\Desktop\大规模复杂软件无效样本清洗\demo1\save_dir") sampleFile_dir = os.path.join(os.getcwd(), "Sample") # out_dir=os.path.join(os.getcwd(), "save_dir") # print(sampleFile_dir) # print(out_dir) # 1.清洗非可执行程序 # notexe_save_dir=os.path.join(os.getcwd(),"notexe_dir") # test = de_PE.deleteNotPE(sampleFile_dir=sampleFile_dir,save_dir=notexe_save_dir) # test.notPEfile_Filter() # 2.清洗加壳程序 # shellsave_dir=os.path.join(os.getcwd(),"shell_dir") # test2=de_shell.de_shell(sample_path=sampleFile_dir,save_dir=shellsave_dir) # test2.fileFilter() # 3.清洗不可表征cfg样本 notcfg_dir=os.path.join(os.getcwd(),"notcfg_dir") test4=de_cfg.de_notcfg(sampleFile_dir=sampleFile_dir,save_dir=notcfg_dir) test4.notcfgfile_Filter() # 4.清洗过大样本文件 # oversizefile_save_dir=os.path.join(os.getcwd(),"oversizefile_dir") # test4=de_oversize.de_oversizefile(sampleFile_dir=sampleFile_dir,max_file_size=10,min_file_size=0.01,save_dir=oversizefile_save_dir) # test4.filesizeFilter() # 5.删除重复的文件 # repeatfilesave_dir=os.path.join(os.getcwd(),"repeatfile_dir") # test5=de_repeatSample.de_repeatfile(sampleFile_dir=sampleFile_dir,save_dir=repeatfilesave_dir) # test5.remove_duplicate_files()