测试改动
This commit is contained in:
parent
737afba0bc
commit
601a61157b
@ -30,12 +30,11 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):
|
|||||||
|
|
||||||
external_function_index_list = [vocab[f_name] for f_name in external_function_name_list]
|
external_function_index_list = [vocab[f_name] for f_name in external_function_name_list]
|
||||||
index += 1
|
index += 1
|
||||||
torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./{}.pt".format(index))
|
torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./cache/benign_{}.pt".format(index))
|
||||||
print(index)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
json_path = "./sample.jsonl"
|
json_path = "./benign_result.jsonl"
|
||||||
train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
|
train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
|
||||||
# train_vocab_file = "./res.jsonl"
|
# train_vocab_file = "./res.jsonl"
|
||||||
max_vocab_size = 10000
|
max_vocab_size = 10000
|
||||||
|
@ -3,9 +3,19 @@ import json
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
file_name = './sample.jsonl'
|
mal_file_name = './malware_result.jsonl'
|
||||||
fil = open(file_name, mode='r')
|
ben_file_name = './benign-result.jsonl'
|
||||||
|
fil = open(mal_file_name, mode='r')
|
||||||
fun_name_dict = {}
|
fun_name_dict = {}
|
||||||
|
for item in tqdm(fil):
|
||||||
|
item = json.loads(item)
|
||||||
|
item_fun_list = item['function_names']
|
||||||
|
for fun_name in item_fun_list:
|
||||||
|
if fun_name_dict.get(fun_name) is not None:
|
||||||
|
fun_name_dict[fun_name] += 1
|
||||||
|
else:
|
||||||
|
fun_name_dict[fun_name] = 1
|
||||||
|
fil = open(mal_file_name, mode='r')
|
||||||
for item in tqdm(fil):
|
for item in tqdm(fil):
|
||||||
item = json.loads(item)
|
item = json.loads(item)
|
||||||
item_fun_list = item['function_names']
|
item_fun_list = item['function_names']
|
||||||
@ -15,7 +25,7 @@ if __name__ == '__main__':
|
|||||||
else:
|
else:
|
||||||
fun_name_dict[fun_name] = 1
|
fun_name_dict[fun_name] = 1
|
||||||
|
|
||||||
with open('./res.jsonl','w') as file:
|
with open('./res.jsonl', 'w') as file:
|
||||||
for key,value in fun_name_dict.items():
|
for key, value in fun_name_dict.items():
|
||||||
temp = {"f_name":key, "count":value}
|
temp = {"f_name": key, "count": value}
|
||||||
file.write(json.dumps(temp) + '\n')
|
file.write(json.dumps(temp) + '\n')
|
||||||
|
@ -4,7 +4,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch_geometric.data import Dataset, DataLoader
|
from torch_geometric.data import Dataset, DataLoader
|
||||||
from utils.RealBatch import create_real_batch_data # noqa
|
from RealBatch import create_real_batch_data # noqa
|
||||||
|
|
||||||
|
|
||||||
class MalwareDetectionDataset(Dataset):
|
class MalwareDetectionDataset(Dataset):
|
||||||
@ -66,7 +66,7 @@ def _simulating(_dataset, _batch_size: int):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
root_path: str = '/home/xiang/MalGraph/data/processed_dataset/DatasetJSON/'
|
root_path: str = '/home/king/python/MalGraph-main/data/processed_dataset/DatasetJSON'
|
||||||
i_batch_size = 2
|
i_batch_size = 2
|
||||||
|
|
||||||
train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train')
|
train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train')
|
||||||
|
Loading…
Reference in New Issue
Block a user