sample_cleaning_demo/Sample_cleaning/de_PE.py
2023-05-12 11:20:02 +08:00

71 lines
2.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pefile
import os
import shutil
from tqdm import tqdm
def is_ELF_file(file_path):
"""
判断样本是否是ELF文件
:param file_path:样本文件路径
:return:是ELF文件返回True否则返回False
"""
# 定义ELF文件的魔数
ELF_MAGIC_NUMBER = b'\x7fELF'
# 读取文件前几个字节
with open(file_path, 'rb') as f:
header = f.read(4)
# 判断文件是否为ELF文件
if header == ELF_MAGIC_NUMBER:
return True
else:
return False
def is_pe_file(file_path):
"""
判断样本是否是PE文件
:param file_path:样本文件路径
:return:是PE文件返回True否则返回False
"""
try:
with open(file_path, 'rb') as f:
header = f.read(4)
pe = pefile.PE(file_path)
return True
except pefile.PEFormatError:
return False
class deleteNotPE(object):
def __init__(self, sampleFile_dir, save_dir):
"""
在大规模复杂软件样本中对非可执行程序进行清除
:param sampleFile_dir:清洗样本的目录
"""
self.sampleFile_dir = sampleFile_dir
self.save_dir = save_dir
def notPEfile_Filter(self):
for root, dirs, files in os.walk(self.sampleFile_dir):
# root:表示正在遍历的文件夹的名字(根 / 子)
# dirs:记录正在遍历的文件夹下的子文件夹集合
# files:记录正在遍历的文件夹中的文件集合
notexe_filelist = []
for file in tqdm(files):
Samplefile_path = os.path.join(self.sampleFile_dir, file)
save_file=os.path.join(root, file)
PEflag = is_pe_file(file_path=Samplefile_path)
ELFflag = is_ELF_file(file_path=Samplefile_path)
if PEflag is True:
print(file + '为PE文件无需清洗')
elif ELFflag is True:
print(file + '为ELF文件无需清洗')
else:
shutil.copy(save_file, self.save_dir)
os.remove(Samplefile_path)
print(file + '为无效样本文件,已将无效样本文件剔除,并讲其保存在'+self.save_dir+'文件中,请查看')
if not os.listdir(root):
print("文件夹" + root + "为空,删除")
os.rmdir(root)