detect_rep/ASM2VEC_plus_scripts/bin2asm_hex.py

142 lines
3.5 KiB
Python
Raw Permalink Normal View History

2023-04-05 10:04:49 +08:00
#!/usr/bin/env python3
import re
import os
import click
import r2pipe
import hashlib
from pathlib import Path
import csv
def sha3(data):
return hashlib.sha3_256(data.encode()).hexdigest()
def validEXE(filename):
magics = [bytes.fromhex('4d5a9000')]
with open(filename, 'rb') as f:
header = f.read(4)
return header in magics
def normalize(opcode):
opcode = opcode.replace(' - ', ' + ')
opcode = re.sub(r'0x[0-9a-f]+', 'CONST', opcode)
opcode = re.sub(r'\*[0-9]', '*CONST', opcode)
opcode = re.sub(r' [0-9]', ' CONST', opcode)
return opcode
def fn2asm(pdf, minlen):
# check
if pdf is None:
return
if len(pdf['ops']) < minlen:
return
if 'invalid' in [op['type'] for op in pdf['ops']]:
return
ops = pdf['ops']
# set label
labels, scope = {}, [op['offset'] for op in ops]
assert (None not in scope)
for i, op in enumerate(ops):
if op.get('jump') in scope:
labels.setdefault(op.get('jump'), i)
# dump output
output = ''
for op in ops:
output+=normalize(op["bytes"])
# output += f'{hex(op["offset"])+":"+normalize(op["opcode"])+":"+normalize(op["bytes"])}\n'
# exit()
return output
def bin2asm(filename, opath, minlen):
# check
# if not validEXE(filename):
# print("fail Exe")
# return 0
# print("666")
r = r2pipe.open(str(filename))
r.cmd('aaaa')
# print(str(filename))
# exit()
count = 0
# header = ['func_name', 'bytes']
csv_data = []
saved_asm_list = []
with open('./asm_func/asm_hex/func_bytes.csv',encoding="utf8") as f:
csv_reader = csv.reader(f)
for line in csv_reader:
# print("good")
# print(line[1])
asm=line[1]
if asm != "bytes":
saved_asm_list.append(line[1])
for fn in r.cmdj('aflj'):
r.cmd(f's {fn["offset"]}')
asm = fn2asm(r.cmdj('pdfj'), minlen)
#如果该asm未重复且不为空则记录
if asm not in saved_asm_list and asm:
csv_data.append([fn["name"], asm])
count += 1
with open('./asm_func/asm_hex/func_bytes.csv','a+',encoding='utf-8',newline='') as fp:
writer =csv.writer(fp)
# writer.writerow(header)
writer.writerows(csv_data)
print(f'[+] {filename}')
return count
@click.command()
@click.option('-i', '--input', 'ipath', help='input directory / file', required=True)
@click.option('-o', '--output', 'opath', default='asm', help='output directory')
@click.option('-l', '--len', 'minlen', default=10,
help='ignore assembly code with instructions amount smaller than minlen')
def cli(ipath, opath, minlen):
'''
Extract assembly functions from binary executable
'''
ipath = Path(ipath)
opath = Path(opath)
# create output directory
if not os.path.exists(opath):
os.mkdir(opath)
fcount, bcount = 0, 0
# directory
if os.path.isdir(ipath):
for f in os.listdir(ipath):
if not os.path.islink(ipath / f) and not os.path.isdir(ipath / f):
fcount += bin2asm(ipath / f, opath, minlen)
bcount += 1
# file
elif os.path.exists(ipath):
fcount += bin2asm(ipath, opath, minlen)
bcount += 1
else:
print(f'[Error] No such file or directory: {ipath}')
print(f'[+] Total scan binary: {bcount} => Total generated assembly functions: {fcount}')
if __name__ == '__main__':
cli()