132 lines
3.6 KiB
Python
132 lines
3.6 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import re
|
||
|
import os
|
||
|
import click
|
||
|
import r2pipe
|
||
|
import hashlib
|
||
|
from pathlib import Path
|
||
|
|
||
|
def sha3(data):
|
||
|
return hashlib.sha3_256(data.encode()).hexdigest()
|
||
|
|
||
|
def validEXE(filename):
|
||
|
magics = [bytes.fromhex('4d5a9000')]
|
||
|
with open(filename, 'rb') as f:
|
||
|
header = f.read(4)
|
||
|
return header in magics
|
||
|
|
||
|
def normalize(opcode):
|
||
|
opcode = opcode.replace(' - ', ' + ')
|
||
|
opcode = re.sub(r'0x[0-9a-f]+', 'CONST', opcode)
|
||
|
opcode = re.sub(r'\*[0-9]', '*CONST', opcode)
|
||
|
opcode = re.sub(r' [0-9]', ' CONST', opcode)
|
||
|
return opcode
|
||
|
|
||
|
def fn2asm(pdf, minlen):
|
||
|
# check
|
||
|
if pdf is None:
|
||
|
return
|
||
|
if len(pdf['ops']) < minlen:
|
||
|
return
|
||
|
if 'invalid' in [op['type'] for op in pdf['ops']]:
|
||
|
return
|
||
|
|
||
|
ops = pdf['ops']
|
||
|
|
||
|
# set label
|
||
|
labels, scope = {}, [op['offset'] for op in ops]
|
||
|
assert(None not in scope)
|
||
|
for i, op in enumerate(ops):
|
||
|
if op.get('jump') in scope:
|
||
|
labels.setdefault(op.get('jump'), i)
|
||
|
|
||
|
# dump output
|
||
|
output = ''
|
||
|
for op in ops:
|
||
|
# print(op)
|
||
|
# exit()
|
||
|
# add label
|
||
|
if labels.get(op.get('offset')) is not None:
|
||
|
output += f'LABEL{labels[op["offset"]]}:\n'
|
||
|
# add instruction
|
||
|
if labels.get(op.get('jump')) is not None:
|
||
|
output += f' {op["type"]} LABEL{labels[op["jump"]]}\n'
|
||
|
else:
|
||
|
output += f' {normalize(op["opcode"])}\n'
|
||
|
|
||
|
return output
|
||
|
|
||
|
def bin2asm(filename, opath, minlen):
|
||
|
# check
|
||
|
# if not validEXE(filename):
|
||
|
# print("fail Exe")
|
||
|
# return 0
|
||
|
|
||
|
# print("666")
|
||
|
r = r2pipe.open(str(filename))
|
||
|
r.cmd('aaaa')
|
||
|
|
||
|
count = 0
|
||
|
|
||
|
for fn in r.cmdj('aflj'):
|
||
|
r.cmd(f's {fn["offset"]}')
|
||
|
asm = fn2asm(r.cmdj('pdfj'), minlen)
|
||
|
if asm:
|
||
|
uid = sha3(asm)
|
||
|
asm = f''' .name {fn["name"]}
|
||
|
.offset {fn["offset"]:016x}
|
||
|
.file {filename.name}
|
||
|
''' + asm
|
||
|
# print(type(opath / uid))
|
||
|
# print(opath)
|
||
|
# print(uid)
|
||
|
# with open(opath / uid, 'w') as f:
|
||
|
file_name=str(fn["name"])
|
||
|
file_name=file_name.replace(':', ' ')
|
||
|
print(file_name)
|
||
|
|
||
|
# if "sym.MSVCRT20.dll_" in file_name:
|
||
|
# file_name=file_name.replace("sym.MSVCRT20.dll_","")
|
||
|
with open(opath /file_name, 'w') as f:
|
||
|
f.write(asm)
|
||
|
count += 1
|
||
|
# if count==5:
|
||
|
# break
|
||
|
print(f'[+] {filename}')
|
||
|
|
||
|
return count
|
||
|
|
||
|
@click.command()
|
||
|
@click.option('-i', '--input', 'ipath', help='input directory / file', required=True)
|
||
|
@click.option('-o', '--output', 'opath', default='asm', help='output directory')
|
||
|
@click.option('-l', '--len', 'minlen', default=10, help='ignore assembly code with instructions amount smaller than minlen')
|
||
|
def cli(ipath, opath, minlen):
|
||
|
'''
|
||
|
Extract assembly functions from binary executable
|
||
|
'''
|
||
|
ipath = Path(ipath)
|
||
|
opath = Path(opath)
|
||
|
|
||
|
# create output directory
|
||
|
if not os.path.exists(opath):
|
||
|
os.mkdir(opath)
|
||
|
|
||
|
fcount, bcount = 0, 0
|
||
|
|
||
|
# directory
|
||
|
if os.path.isdir(ipath):
|
||
|
for f in os.listdir(ipath):
|
||
|
if not os.path.islink(ipath / f) and not os.path.isdir(ipath / f):
|
||
|
fcount += bin2asm(ipath / f, opath, minlen)
|
||
|
bcount += 1
|
||
|
# file
|
||
|
elif os.path.exists(ipath):
|
||
|
fcount += bin2asm(ipath, opath, minlen)
|
||
|
bcount += 1
|
||
|
else:
|
||
|
print(f'[Error] No such file or directory: {ipath}')
|
||
|
print(f'[+] Total scan binary: {bcount} => Total generated assembly functions: {fcount}')
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
cli()
|