#!/usr/bin/env python3 import re import os import click import r2pipe import hashlib from pathlib import Path import csv def sha3(data): return hashlib.sha3_256(data.encode()).hexdigest() def validEXE(filename): magics = [bytes.fromhex('4d5a9000')] with open(filename, 'rb') as f: header = f.read(4) return header in magics def normalize(opcode): opcode = opcode.replace(' - ', ' + ') opcode = re.sub(r'0x[0-9a-f]+', 'CONST', opcode) opcode = re.sub(r'\*[0-9]', '*CONST', opcode) opcode = re.sub(r' [0-9]', ' CONST', opcode) return opcode def fn2asm(pdf, minlen): # check if pdf is None: return if len(pdf['ops']) < minlen: return if 'invalid' in [op['type'] for op in pdf['ops']]: return ops = pdf['ops'] # set label labels, scope = {}, [op['offset'] for op in ops] assert (None not in scope) for i, op in enumerate(ops): if op.get('jump') in scope: labels.setdefault(op.get('jump'), i) # dump output output = '' for op in ops: output+=normalize(op["bytes"]) # output += f'{hex(op["offset"])+":"+normalize(op["opcode"])+":"+normalize(op["bytes"])}\n' # exit() return output def bin2asm(filename, opath, minlen): # check # if not validEXE(filename): # print("fail Exe") # return 0 # print("666") r = r2pipe.open(str(filename)) r.cmd('aaaa') # print(str(filename)) # exit() count = 0 # header = ['func_name', 'bytes'] csv_data = [] saved_asm_list = [] with open('./asm_func/asm_hex/func_bytes.csv',encoding="utf8") as f: csv_reader = csv.reader(f) for line in csv_reader: # print("good") # print(line[1]) asm=line[1] if asm != "bytes": saved_asm_list.append(line[1]) for fn in r.cmdj('aflj'): r.cmd(f's {fn["offset"]}') asm = fn2asm(r.cmdj('pdfj'), minlen) #如果该asm未重复且不为空,则记录 if asm not in saved_asm_list and asm: csv_data.append([fn["name"], asm]) count += 1 with open('./asm_func/asm_hex/func_bytes.csv','a+',encoding='utf-8',newline='') as fp: writer =csv.writer(fp) # writer.writerow(header) writer.writerows(csv_data) print(f'[+] {filename}') return count @click.command() @click.option('-i', '--input', 'ipath', help='input directory / file', required=True) @click.option('-o', '--output', 'opath', default='asm', help='output directory') @click.option('-l', '--len', 'minlen', default=10, help='ignore assembly code with instructions amount smaller than minlen') def cli(ipath, opath, minlen): ''' Extract assembly functions from binary executable ''' ipath = Path(ipath) opath = Path(opath) # create output directory if not os.path.exists(opath): os.mkdir(opath) fcount, bcount = 0, 0 # directory if os.path.isdir(ipath): for f in os.listdir(ipath): if not os.path.islink(ipath / f) and not os.path.isdir(ipath / f): fcount += bin2asm(ipath / f, opath, minlen) bcount += 1 # file elif os.path.exists(ipath): fcount += bin2asm(ipath, opath, minlen) bcount += 1 else: print(f'[Error] No such file or directory: {ipath}') print(f'[+] Total scan binary: {bcount} => Total generated assembly functions: {fcount}') if __name__ == '__main__': cli()