测试改动

This commit is contained in:
huihun 2024-01-05 14:30:45 +08:00
parent 737afba0bc
commit 601a61157b
3 changed files with 19 additions and 10 deletions

View File

@ -30,12 +30,11 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):
external_function_index_list = [vocab[f_name] for f_name in external_function_name_list] external_function_index_list = [vocab[f_name] for f_name in external_function_name_list]
index += 1 index += 1
torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./{}.pt".format(index)) torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./cache/benign_{}.pt".format(index))
print(index)
if __name__ == '__main__': if __name__ == '__main__':
json_path = "./sample.jsonl" json_path = "./benign_result.jsonl"
train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl" train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
# train_vocab_file = "./res.jsonl" # train_vocab_file = "./res.jsonl"
max_vocab_size = 10000 max_vocab_size = 10000

View File

@ -3,9 +3,19 @@ import json
from tqdm import tqdm from tqdm import tqdm
if __name__ == '__main__': if __name__ == '__main__':
file_name = './sample.jsonl' mal_file_name = './malware_result.jsonl'
fil = open(file_name, mode='r') ben_file_name = './benign-result.jsonl'
fil = open(mal_file_name, mode='r')
fun_name_dict = {} fun_name_dict = {}
for item in tqdm(fil):
item = json.loads(item)
item_fun_list = item['function_names']
for fun_name in item_fun_list:
if fun_name_dict.get(fun_name) is not None:
fun_name_dict[fun_name] += 1
else:
fun_name_dict[fun_name] = 1
fil = open(mal_file_name, mode='r')
for item in tqdm(fil): for item in tqdm(fil):
item = json.loads(item) item = json.loads(item)
item_fun_list = item['function_names'] item_fun_list = item['function_names']
@ -15,7 +25,7 @@ if __name__ == '__main__':
else: else:
fun_name_dict[fun_name] = 1 fun_name_dict[fun_name] = 1
with open('./res.jsonl','w') as file: with open('./res.jsonl', 'w') as file:
for key,value in fun_name_dict.items(): for key, value in fun_name_dict.items():
temp = {"f_name":key, "count":value} temp = {"f_name": key, "count": value}
file.write(json.dumps(temp) + '\n') file.write(json.dumps(temp) + '\n')

View File

@ -4,7 +4,7 @@ from datetime import datetime
import torch import torch
from torch_geometric.data import Dataset, DataLoader from torch_geometric.data import Dataset, DataLoader
from utils.RealBatch import create_real_batch_data # noqa from RealBatch import create_real_batch_data # noqa
class MalwareDetectionDataset(Dataset): class MalwareDetectionDataset(Dataset):
@ -66,7 +66,7 @@ def _simulating(_dataset, _batch_size: int):
if __name__ == '__main__': if __name__ == '__main__':
root_path: str = '/home/xiang/MalGraph/data/processed_dataset/DatasetJSON/' root_path: str = '/home/king/python/MalGraph-main/data/processed_dataset/DatasetJSON'
i_batch_size = 2 i_batch_size = 2
train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train') train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train')