94 lines
3.3 KiB
Python
94 lines
3.3 KiB
Python
import torch
|
||
import torch.nn as nn
|
||
import click
|
||
import asm2vec
|
||
from utils2 import sigmoid, get_batches, compute_pca, get_dict
|
||
from matplotlib import pyplot
|
||
import numpy as np
|
||
import os
|
||
import random
|
||
from asm2vec.get_opcode_vector import get_asm_input_vector,str_hex_to_bytes
|
||
|
||
def cosine_similarity(v1, v2):
|
||
return (v1 @ v2 / (v1.norm() * v2.norm())).item()
|
||
|
||
def load_model(path="./asm2vec_checkpoints/model.pt"):
|
||
|
||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||
model = asm2vec.utils.load_model(path, device=device)
|
||
return model
|
||
|
||
def func2vec1(model,hex_asm_list=["56a194382b56508b35cc912b56ffd6ff3584382b56ffd6a174382b5650ffd65ec3","56a194382b56508b35cc912b56ffd6ff3584382b56ffd6a174382b5650ffd65ec3"]):
|
||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||
fun2vec_origin_list=[]
|
||
for hex_asm in hex_asm_list:
|
||
hex2vec_list = str_hex_to_bytes(hex_asm)
|
||
hex2vec_list, opcode_oprand_seq = get_asm_input_vector(hex2vec_list)
|
||
hex2vec_list=hex2vec_list
|
||
fun2vec_origin = [0.0] * len(hex2vec_list[0])
|
||
#开始对每一行的代码求平均值,得到函数的vec
|
||
for i in hex2vec_list:
|
||
for j in range(len(i)):
|
||
fun2vec_origin[j] += i[j]
|
||
opcode_seq_len=len(hex2vec_list)
|
||
for i in range(len(fun2vec_origin)):
|
||
fun2vec_origin[i] = fun2vec_origin[i] / opcode_seq_len
|
||
fun2vec_origin=torch.tensor(fun2vec_origin).to(device)
|
||
fun2vec_origin_list.append(fun2vec_origin)
|
||
|
||
fun2vec_origin_list = torch.tensor([item.cpu().detach().numpy() for item in fun2vec_origin_list]).cuda()
|
||
|
||
# print(fun2vec_origin_list)
|
||
# exit()
|
||
embedding_func_vec = model.to(device).linear_f(torch.tensor(fun2vec_origin_list).to(device)).clone().detach().requires_grad_(True)
|
||
# print(embedding_func_vec)
|
||
return embedding_func_vec
|
||
|
||
def func2vec(model,hex_asm="56a194382b56508b35cc912b56ffd6ff3584382b56ffd6a174382b5650ffd65ec3"):
|
||
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||
# device="cpu"
|
||
# print(device)
|
||
# device="cpu"
|
||
# exit()
|
||
hex2vec_list = str_hex_to_bytes(hex_asm)
|
||
|
||
|
||
hex2vec_list, opcode_oprand_seq = get_asm_input_vector(hex2vec_list)
|
||
hex2vec_list=hex2vec_list
|
||
# print(hex2vec_list)
|
||
# exit()
|
||
# print(hex2vec_list)
|
||
# print(hex2vec_list)
|
||
# exit()
|
||
fun2vec_origin = [0.0] * len(hex2vec_list[0])
|
||
|
||
# print(hex2vec_list)
|
||
# print(fun2vec_origin)
|
||
# print(torch.tensor([0,0,0])+torch.tensor([12,2,3]))
|
||
# exit()
|
||
#开始对每一行的代码求平均值,得到函数的vec
|
||
for i in hex2vec_list:
|
||
for j in range(len(i)):
|
||
fun2vec_origin[j] += i[j]
|
||
# print(fun2vec_origin)
|
||
# exit()
|
||
opcode_seq_len=len(hex2vec_list)
|
||
for i in range(len(fun2vec_origin)):
|
||
fun2vec_origin[i] = fun2vec_origin[i] / opcode_seq_len
|
||
# print(fun2vec_origin)
|
||
fun2vec_origin=torch.tensor(fun2vec_origin).to(device)
|
||
|
||
|
||
|
||
embedding_func_vec = model.to(device).linear_f(fun2vec_origin)
|
||
# print(embedding_func_vec)
|
||
# exit()
|
||
return embedding_func_vec
|
||
|
||
if __name__ == '__main__':
|
||
model= load_model(path="./asm2vec_checkpoints/model_100.pt")
|
||
|
||
# model=
|
||
# func2vec(model,hex_asm="f044014910488b81e00000004885c07404")
|
||
func2vec(model)
|
||
# func2vec(model,hex_asm="56a194382b56508b35cc912b56ffd6ff3584382b56ffd6a174382b5650ffd65ec3") |