import angr import os import shutil from tqdm import tqdm import pefile def is_ELF_file(file_path): """ 判断样本是否是ELF文件 :param file_path:样本文件路径 :return:是ELF文件返回True,否则返回False """ # 定义ELF文件的魔数 ELF_MAGIC_NUMBER = b'\x7fELF' # 读取文件前几个字节 with open(file_path, 'rb') as f: header = f.read(4) # 判断文件是否为ELF文件 if header == ELF_MAGIC_NUMBER: return True else: return False def is_pe_file(file_path): """ 判断样本是否是PE文件 :param file_path:样本文件路径 :return:是PE文件返回True,否则返回False """ try: with open(file_path, 'rb') as f: header = f.read(4) pe = pefile.PE(file_path) return True except pefile.PEFormatError: return False def is_cfg(file_path): try: p = angr.Project(file_path, load_options={'auto_load_libs': False}) cfg = p.analyses.CFGFast(show_progressbar=True, normalize=False, resolve_indirect_jumps=False, force_smart_scan=False, symbols=False, data_references=False) G = cfg.graph print(type(G)) if G is None: return False else: return True except Exception as e: return False class de_notcfg(object): def __init__(self, sampleFile_dir, save_dir): self.sampleFile_dir=sampleFile_dir self.save_dir=save_dir def notcfgfile_Filter(self): for root, dirs, files in os.walk(self.sampleFile_dir): # root:表示正在遍历的文件夹的名字(根 / 子) # dirs:记录正在遍历的文件夹下的子文件夹集合 # files:记录正在遍历的文件夹中的文件集合 notexe_filelist = [] for file in tqdm(files): try: Samplefile_path = os.path.join(self.sampleFile_dir, file) save_file = os.path.join(root, file) pe_flag = is_pe_file(file_path=Samplefile_path) elf_flag = is_ELF_file(file_path=Samplefile_path) if (pe_flag is True) or (elf_flag is True): cfg_flag = is_cfg(Samplefile_path) if cfg_flag is True: print(file + '该文件可以表征成cfg') else: shutil.copy(save_file, self.save_dir) os.remove(Samplefile_path) print(file + '不可表征成cfg,清洗文件保存在' + self.save_dir + '文件中,请查看') else: shutil.copy(save_file, self.save_dir) os.remove(Samplefile_path) print(file + "不是elf或PE文件,不可表征为cfg") except PermissionError as e: print(f"Argument Error: {e}") if not os.listdir(root): print("文件夹" + root + "为空,删除") shutil.copy(root, self.save_dir) os.rmdir(root)