142 lines
3.5 KiB
Python
142 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
||
import re
|
||
import os
|
||
import click
|
||
import r2pipe
|
||
import hashlib
|
||
from pathlib import Path
|
||
import csv
|
||
|
||
def sha3(data):
|
||
return hashlib.sha3_256(data.encode()).hexdigest()
|
||
|
||
|
||
def validEXE(filename):
|
||
magics = [bytes.fromhex('4d5a9000')]
|
||
with open(filename, 'rb') as f:
|
||
header = f.read(4)
|
||
return header in magics
|
||
|
||
|
||
def normalize(opcode):
|
||
opcode = opcode.replace(' - ', ' + ')
|
||
opcode = re.sub(r'0x[0-9a-f]+', 'CONST', opcode)
|
||
opcode = re.sub(r'\*[0-9]', '*CONST', opcode)
|
||
opcode = re.sub(r' [0-9]', ' CONST', opcode)
|
||
return opcode
|
||
|
||
|
||
def fn2asm(pdf, minlen):
|
||
# check
|
||
if pdf is None:
|
||
return
|
||
if len(pdf['ops']) < minlen:
|
||
return
|
||
if 'invalid' in [op['type'] for op in pdf['ops']]:
|
||
return
|
||
|
||
ops = pdf['ops']
|
||
|
||
# set label
|
||
labels, scope = {}, [op['offset'] for op in ops]
|
||
assert (None not in scope)
|
||
for i, op in enumerate(ops):
|
||
if op.get('jump') in scope:
|
||
labels.setdefault(op.get('jump'), i)
|
||
|
||
# dump output
|
||
output = ''
|
||
|
||
|
||
for op in ops:
|
||
output+=normalize(op["bytes"])
|
||
|
||
# output += f'{hex(op["offset"])+":"+normalize(op["opcode"])+":"+normalize(op["bytes"])}\n'
|
||
# exit()
|
||
|
||
return output
|
||
|
||
|
||
def bin2asm(filename, opath, minlen):
|
||
# check
|
||
# if not validEXE(filename):
|
||
# print("fail Exe")
|
||
# return 0
|
||
|
||
# print("666")
|
||
r = r2pipe.open(str(filename))
|
||
r.cmd('aaaa')
|
||
|
||
# print(str(filename))
|
||
# exit()
|
||
count = 0
|
||
# header = ['func_name', 'bytes']
|
||
csv_data = []
|
||
|
||
saved_asm_list = []
|
||
with open('./asm_func/asm_hex/func_bytes.csv',encoding="utf8") as f:
|
||
csv_reader = csv.reader(f)
|
||
for line in csv_reader:
|
||
# print("good")
|
||
# print(line[1])
|
||
asm=line[1]
|
||
|
||
if asm != "bytes":
|
||
saved_asm_list.append(line[1])
|
||
|
||
|
||
for fn in r.cmdj('aflj'):
|
||
r.cmd(f's {fn["offset"]}')
|
||
asm = fn2asm(r.cmdj('pdfj'), minlen)
|
||
#如果该asm未重复且不为空,则记录
|
||
if asm not in saved_asm_list and asm:
|
||
csv_data.append([fn["name"], asm])
|
||
count += 1
|
||
|
||
with open('./asm_func/asm_hex/func_bytes.csv','a+',encoding='utf-8',newline='') as fp:
|
||
writer =csv.writer(fp)
|
||
# writer.writerow(header)
|
||
writer.writerows(csv_data)
|
||
|
||
print(f'[+] {filename}')
|
||
|
||
return count
|
||
|
||
|
||
@click.command()
|
||
@click.option('-i', '--input', 'ipath', help='input directory / file', required=True)
|
||
@click.option('-o', '--output', 'opath', default='asm', help='output directory')
|
||
@click.option('-l', '--len', 'minlen', default=10,
|
||
help='ignore assembly code with instructions amount smaller than minlen')
|
||
def cli(ipath, opath, minlen):
|
||
'''
|
||
Extract assembly functions from binary executable
|
||
'''
|
||
ipath = Path(ipath)
|
||
opath = Path(opath)
|
||
|
||
# create output directory
|
||
if not os.path.exists(opath):
|
||
os.mkdir(opath)
|
||
|
||
fcount, bcount = 0, 0
|
||
|
||
# directory
|
||
if os.path.isdir(ipath):
|
||
for f in os.listdir(ipath):
|
||
if not os.path.islink(ipath / f) and not os.path.isdir(ipath / f):
|
||
|
||
fcount += bin2asm(ipath / f, opath, minlen)
|
||
bcount += 1
|
||
# file
|
||
elif os.path.exists(ipath):
|
||
fcount += bin2asm(ipath, opath, minlen)
|
||
bcount += 1
|
||
else:
|
||
print(f'[Error] No such file or directory: {ipath}')
|
||
print(f'[+] Total scan binary: {bcount} => Total generated assembly functions: {fcount}')
|
||
|
||
|
||
if __name__ == '__main__':
|
||
cli()
|