71 lines
2.4 KiB
Python
71 lines
2.4 KiB
Python
import pefile
|
||
import os
|
||
import shutil
|
||
from tqdm import tqdm
|
||
def is_ELF_file(file_path):
|
||
"""
|
||
判断样本是否是ELF文件
|
||
:param file_path:样本文件路径
|
||
:return:是ELF文件返回True,否则返回False
|
||
"""
|
||
# 定义ELF文件的魔数
|
||
ELF_MAGIC_NUMBER = b'\x7fELF'
|
||
|
||
# 读取文件前几个字节
|
||
with open(file_path, 'rb') as f:
|
||
header = f.read(4)
|
||
|
||
# 判断文件是否为ELF文件
|
||
if header == ELF_MAGIC_NUMBER:
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
|
||
def is_pe_file(file_path):
|
||
"""
|
||
判断样本是否是PE文件
|
||
:param file_path:样本文件路径
|
||
:return:是PE文件返回True,否则返回False
|
||
"""
|
||
try:
|
||
with open(file_path, 'rb') as f:
|
||
header = f.read(4)
|
||
pe = pefile.PE(file_path)
|
||
return True
|
||
except pefile.PEFormatError:
|
||
return False
|
||
|
||
|
||
class deleteNotPE(object):
|
||
def __init__(self, sampleFile_dir, save_dir):
|
||
"""
|
||
在大规模复杂软件样本中对非可执行程序进行清除
|
||
:param sampleFile_dir:清洗样本的目录
|
||
"""
|
||
self.sampleFile_dir = sampleFile_dir
|
||
self.save_dir = save_dir
|
||
|
||
def notPEfile_Filter(self):
|
||
|
||
for root, dirs, files in os.walk(self.sampleFile_dir):
|
||
# root:表示正在遍历的文件夹的名字(根 / 子)
|
||
# dirs:记录正在遍历的文件夹下的子文件夹集合
|
||
# files:记录正在遍历的文件夹中的文件集合
|
||
notexe_filelist = []
|
||
for file in tqdm(files):
|
||
Samplefile_path = os.path.join(self.sampleFile_dir, file)
|
||
save_file=os.path.join(root, file)
|
||
PEflag = is_pe_file(file_path=Samplefile_path)
|
||
ELFflag = is_ELF_file(file_path=Samplefile_path)
|
||
if PEflag is True:
|
||
print(file + '为PE文件无需清洗')
|
||
elif ELFflag is True:
|
||
print(file + '为ELF文件无需清洗')
|
||
else:
|
||
shutil.copy(save_file, self.save_dir)
|
||
os.remove(Samplefile_path)
|
||
print(file + '为无效样本文件,已将无效样本文件剔除,并讲其保存在'+self.save_dir+'文件中,请查看')
|
||
if not os.listdir(root):
|
||
print("文件夹" + root + "为空,删除")
|
||
os.rmdir(root) |