测试改动

2024-01-05 14:30:45 +08:00 · 2024-01-05 14:30:45 +08:00 · 601a61157b
commit 601a61157b
parent 737afba0bc
3 changed files with 19 additions and 10 deletions
--- a/samples/PreProcess.py
+++ b/samples/PreProcess.py
@ -30,12 +30,11 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):
            external_function_index_list = [vocab[f_name] for f_name in external_function_name_list]
            index += 1
-            torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./{}.pt".format(index))
+            torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./cache/benign_{}.pt".format(index))
            print(index)
 if __name__ == '__main__':
-    json_path = "./sample.jsonl"
+    json_path = "./benign_result.jsonl"
    train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
    # train_vocab_file = "./res.jsonl"
    max_vocab_size = 10000
--- a/samples/funCount.py
+++ b/samples/funCount.py
@ -3,9 +3,19 @@ import json
 from tqdm import tqdm
 if __name__ == '__main__':
-    file_name = './sample.jsonl'
+    mal_file_name = './malware_result.jsonl'
-    fil = open(file_name, mode='r')
+    ben_file_name = './benign-result.jsonl'
    fil = open(mal_file_name, mode='r')
    fun_name_dict = {}
    for item in tqdm(fil):
        item = json.loads(item)
        item_fun_list = item['function_names']
        for fun_name in item_fun_list:
            if fun_name_dict.get(fun_name) is not None:
                fun_name_dict[fun_name] += 1
            else:
                fun_name_dict[fun_name] = 1
    fil = open(mal_file_name, mode='r')
    for item in tqdm(fil):
        item = json.loads(item)
        item_fun_list = item['function_names']
@ -15,7 +25,7 @@ if __name__ == '__main__':
            else:
                fun_name_dict[fun_name] = 1
-    with open('./res.jsonl','w') as file:
+    with open('./res.jsonl', 'w') as file:
-        for key,value in fun_name_dict.items():
+        for key, value in fun_name_dict.items():
-            temp = {"f_name":key, "count":value}
+            temp = {"f_name": key, "count": value}
            file.write(json.dumps(temp) + '\n')
--- a/src/utils/PreProcessedDataset.py
+++ b/src/utils/PreProcessedDataset.py
@ -4,7 +4,7 @@ from datetime import datetime
 import torch
 from torch_geometric.data import Dataset, DataLoader
-from utils.RealBatch import create_real_batch_data  # noqa
+from RealBatch import create_real_batch_data  # noqa
 class MalwareDetectionDataset(Dataset):
@ -66,7 +66,7 @@ def _simulating(_dataset, _batch_size: int):
 if __name__ == '__main__':
-    root_path: str = '/home/xiang/MalGraph/data/processed_dataset/DatasetJSON/'
+    root_path: str = '/home/king/python/MalGraph-main/data/processed_dataset/DatasetJSON'
    i_batch_size = 2
    train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train')