142 lines
3.5 KiB
Python
142 lines
3.5 KiB
Python
|
#!/usr/bin/env python3
|
|||
|
import re
|
|||
|
import os
|
|||
|
import click
|
|||
|
import r2pipe
|
|||
|
import hashlib
|
|||
|
from pathlib import Path
|
|||
|
import csv
|
|||
|
|
|||
|
def sha3(data):
|
|||
|
return hashlib.sha3_256(data.encode()).hexdigest()
|
|||
|
|
|||
|
|
|||
|
def validEXE(filename):
|
|||
|
magics = [bytes.fromhex('4d5a9000')]
|
|||
|
with open(filename, 'rb') as f:
|
|||
|
header = f.read(4)
|
|||
|
return header in magics
|
|||
|
|
|||
|
|
|||
|
def normalize(opcode):
|
|||
|
opcode = opcode.replace(' - ', ' + ')
|
|||
|
opcode = re.sub(r'0x[0-9a-f]+', 'CONST', opcode)
|
|||
|
opcode = re.sub(r'\*[0-9]', '*CONST', opcode)
|
|||
|
opcode = re.sub(r' [0-9]', ' CONST', opcode)
|
|||
|
return opcode
|
|||
|
|
|||
|
|
|||
|
def fn2asm(pdf, minlen):
|
|||
|
# check
|
|||
|
if pdf is None:
|
|||
|
return
|
|||
|
if len(pdf['ops']) < minlen:
|
|||
|
return
|
|||
|
if 'invalid' in [op['type'] for op in pdf['ops']]:
|
|||
|
return
|
|||
|
|
|||
|
ops = pdf['ops']
|
|||
|
|
|||
|
# set label
|
|||
|
labels, scope = {}, [op['offset'] for op in ops]
|
|||
|
assert (None not in scope)
|
|||
|
for i, op in enumerate(ops):
|
|||
|
if op.get('jump') in scope:
|
|||
|
labels.setdefault(op.get('jump'), i)
|
|||
|
|
|||
|
# dump output
|
|||
|
output = ''
|
|||
|
|
|||
|
|
|||
|
for op in ops:
|
|||
|
output+=normalize(op["bytes"])
|
|||
|
|
|||
|
# output += f'{hex(op["offset"])+":"+normalize(op["opcode"])+":"+normalize(op["bytes"])}\n'
|
|||
|
# exit()
|
|||
|
|
|||
|
return output
|
|||
|
|
|||
|
|
|||
|
def bin2asm(filename, opath, minlen):
|
|||
|
# check
|
|||
|
# if not validEXE(filename):
|
|||
|
# print("fail Exe")
|
|||
|
# return 0
|
|||
|
|
|||
|
# print("666")
|
|||
|
r = r2pipe.open(str(filename))
|
|||
|
r.cmd('aaaa')
|
|||
|
|
|||
|
# print(str(filename))
|
|||
|
# exit()
|
|||
|
count = 0
|
|||
|
# header = ['func_name', 'bytes']
|
|||
|
csv_data = []
|
|||
|
|
|||
|
saved_asm_list = []
|
|||
|
with open('./asm_func/asm_hex/func_bytes.csv',encoding="utf8") as f:
|
|||
|
csv_reader = csv.reader(f)
|
|||
|
for line in csv_reader:
|
|||
|
# print("good")
|
|||
|
# print(line[1])
|
|||
|
asm=line[1]
|
|||
|
|
|||
|
if asm != "bytes":
|
|||
|
saved_asm_list.append(line[1])
|
|||
|
|
|||
|
|
|||
|
for fn in r.cmdj('aflj'):
|
|||
|
r.cmd(f's {fn["offset"]}')
|
|||
|
asm = fn2asm(r.cmdj('pdfj'), minlen)
|
|||
|
#如果该asm未重复且不为空,则记录
|
|||
|
if asm not in saved_asm_list and asm:
|
|||
|
csv_data.append([fn["name"], asm])
|
|||
|
count += 1
|
|||
|
|
|||
|
with open('./asm_func/asm_hex/func_bytes.csv','a+',encoding='utf-8',newline='') as fp:
|
|||
|
writer =csv.writer(fp)
|
|||
|
# writer.writerow(header)
|
|||
|
writer.writerows(csv_data)
|
|||
|
|
|||
|
print(f'[+] {filename}')
|
|||
|
|
|||
|
return count
|
|||
|
|
|||
|
|
|||
|
@click.command()
|
|||
|
@click.option('-i', '--input', 'ipath', help='input directory / file', required=True)
|
|||
|
@click.option('-o', '--output', 'opath', default='asm', help='output directory')
|
|||
|
@click.option('-l', '--len', 'minlen', default=10,
|
|||
|
help='ignore assembly code with instructions amount smaller than minlen')
|
|||
|
def cli(ipath, opath, minlen):
|
|||
|
'''
|
|||
|
Extract assembly functions from binary executable
|
|||
|
'''
|
|||
|
ipath = Path(ipath)
|
|||
|
opath = Path(opath)
|
|||
|
|
|||
|
# create output directory
|
|||
|
if not os.path.exists(opath):
|
|||
|
os.mkdir(opath)
|
|||
|
|
|||
|
fcount, bcount = 0, 0
|
|||
|
|
|||
|
# directory
|
|||
|
if os.path.isdir(ipath):
|
|||
|
for f in os.listdir(ipath):
|
|||
|
if not os.path.islink(ipath / f) and not os.path.isdir(ipath / f):
|
|||
|
|
|||
|
fcount += bin2asm(ipath / f, opath, minlen)
|
|||
|
bcount += 1
|
|||
|
# file
|
|||
|
elif os.path.exists(ipath):
|
|||
|
fcount += bin2asm(ipath, opath, minlen)
|
|||
|
bcount += 1
|
|||
|
else:
|
|||
|
print(f'[Error] No such file or directory: {ipath}')
|
|||
|
print(f'[+] Total scan binary: {bcount} => Total generated assembly functions: {fcount}')
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
cli()
|