first commit
This commit is contained in:
commit
cd8ed5d802
45
log_utils.py
Normal file
45
log_utils.py
Normal file
@ -0,0 +1,45 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
|
||||
def setup_logger(name, log_file, level=logging.INFO):
|
||||
"""Function setup as many loggers as you want"""
|
||||
if not os.path.exists(os.path.dirname(log_file)):
|
||||
os.makedirs(os.path.dirname(log_file))
|
||||
|
||||
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
||||
|
||||
handler = logging.FileHandler(log_file)
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
# 控制台是否输出日志信息
|
||||
# stream_handler = logging.StreamHandler()
|
||||
# stream_handler.setFormatter(formatter)
|
||||
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(level)
|
||||
logger.addHandler(handler)
|
||||
# 控制台
|
||||
# logger.addHandler(stream_handler)
|
||||
|
||||
# 刷新原有log文件
|
||||
|
||||
if os.path.exists(log_file):
|
||||
open(log_file, 'w').close()
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
"""
|
||||
用法示例
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
log_file = "app.log"
|
||||
logger = setup_logger(__name__, log_file)
|
||||
|
||||
logger.info("Application started.")
|
||||
logger.debug("Debug message.")
|
||||
logger.warning("Warning message.")
|
||||
logger.error("Error occurred.")
|
84
main.py
Normal file
84
main.py
Normal file
@ -0,0 +1,84 @@
|
||||
import csv
|
||||
import re
|
||||
import pandas as pd
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
from log_utils import setup_logger
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def csv_write(data: list):
|
||||
df = pd.DataFrame(data)
|
||||
chunksize = 1000
|
||||
for i in range(0, len(df), chunksize):
|
||||
df.iloc[i:i + chunksize].to_csv('./out/output.csv', mode='a', header=False, index=False)
|
||||
return True
|
||||
|
||||
|
||||
def findOpcode_in_asm_file(content, logger):
|
||||
pattern = r'\t{2}(\w+)\s'
|
||||
result = []
|
||||
sections = content.read().split("\n\n")
|
||||
over_num_flag = False
|
||||
for item in sections:
|
||||
if item.startswith(';'):
|
||||
continue
|
||||
# if acfg.funcname != 'start' and acfg.funcname != 'start_0' and 'sub_' not in acfg.funcname:
|
||||
# TODO 判断函数是否为外部函数
|
||||
instructions = re.findall(pattern, item)
|
||||
if instructions and len(instructions) != 1 and instructions[0] != 'retn':
|
||||
instructions_remove_Opcode_list = {'align', 'dp', 'dd', 'db', 'dq'}
|
||||
if not instructions_remove_Opcode_list.isdisjoint(instructions):
|
||||
instructions[:] = [item for item in instructions if item not in instructions_remove_Opcode_list]
|
||||
if len(instructions) > 200:
|
||||
over_num_flag = True
|
||||
logger.info(f"over 200 Opcode is {instructions},list len {len(instructions)}")
|
||||
result.append(instructions[:200])
|
||||
else:
|
||||
result.append(instructions)
|
||||
none_flag = True if len(result) == 0 else False
|
||||
return over_num_flag, none_flag, result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_time = time.time()
|
||||
logger = setup_logger('asm_to_csv', './log/asm_to_csv.log')
|
||||
asm_file_path = os.path.join("D:/bishe/dataset/infected/infected_asm/")
|
||||
file_list = os.listdir(asm_file_path)
|
||||
Opcode_list = []
|
||||
none_Opcode_list = []
|
||||
done_file_num = 0
|
||||
for file in file_list:
|
||||
try:
|
||||
with open(asm_file_path + file, 'r', errors='ignore') as asm_file:
|
||||
over_flag, flag, result = findOpcode_in_asm_file(asm_file, logger)
|
||||
if flag:
|
||||
logger.warning(f"file {file} Opcode is empty")
|
||||
continue
|
||||
else:
|
||||
if over_flag:
|
||||
logger.info(f"file {file} Opcode num is over 200")
|
||||
Opcode_list.extend(result)
|
||||
done_file_num += 1
|
||||
if len(Opcode_list) > 50000:
|
||||
print("*======================start write==================================*")
|
||||
write_res = csv_write(Opcode_list)
|
||||
Opcode_list.clear()
|
||||
print("list clear")
|
||||
print(f"done {done_file_num} files")
|
||||
print("*=================write to csv success==============================*")
|
||||
except Exception as e:
|
||||
print(f"Error processing file {file}: {e}")
|
||||
if len(Opcode_list) > 0:
|
||||
print("*======================start write==================================*")
|
||||
write_res = csv_write(Opcode_list)
|
||||
Opcode_list.clear()
|
||||
print("list clear")
|
||||
print(f"done {done_file_num} files")
|
||||
print("*=================write to csv success==============================*")
|
||||
logger.debug(f"none Opcode file list {none_Opcode_list} ")
|
||||
end_time = time.time()
|
||||
print(f"Done processing {done_file_num} files")
|
||||
print(f"Total time: {end_time - start_time} "
|
||||
f"seconds, start at :{datetime.fromtimestamp(start_time).strftime('%Y-%m-%d %H:%M:%S')}")
|
Loading…
Reference in New Issue
Block a user