测试改动

2024-01-05 14:30:45 +08:00 · 2024-01-05 14:30:45 +08:00 · 601a61157b
commit 601a61157b
parent 737afba0bc
3 changed files with 19 additions and 10 deletions
--- a/samples/PreProcess.py
+++ b/samples/PreProcess.py
@ -30,12 +30,11 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):
            
            external_function_index_list = [vocab[f_name] for f_name in external_function_name_list]
            index += 1
-            torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./{}.pt".format(index))
-            print(index)
+            torch.save(Data(hash=item_hash, local_acfgs=acfg_list, external_list=external_function_index_list, function_edges=item_function_edges, targets=label), "./cache/benign_{}.pt".format(index))


 if __name__ == '__main__':
-    json_path = "./sample.jsonl"
+    json_path = "./benign_result.jsonl"
    train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
    # train_vocab_file = "./res.jsonl"
    max_vocab_size = 10000
--- a/samples/funCount.py
+++ b/samples/funCount.py
@ -3,9 +3,19 @@ import json
 from tqdm import tqdm

 if __name__ == '__main__':
-    file_name = './sample.jsonl'
-    fil = open(file_name, mode='r')
+    mal_file_name = './malware_result.jsonl'
+    ben_file_name = './benign-result.jsonl'
+    fil = open(mal_file_name, mode='r')
    fun_name_dict = {}
+    for item in tqdm(fil):
+        item = json.loads(item)
+        item_fun_list = item['function_names']
+        for fun_name in item_fun_list:
+            if fun_name_dict.get(fun_name) is not None:
+                fun_name_dict[fun_name] += 1
+            else:
+                fun_name_dict[fun_name] = 1
+    fil = open(mal_file_name, mode='r')
    for item in tqdm(fil):
        item = json.loads(item)
        item_fun_list = item['function_names']
--- a/src/utils/PreProcessedDataset.py
+++ b/src/utils/PreProcessedDataset.py
@ -4,7 +4,7 @@ from datetime import datetime

 import torch
 from torch_geometric.data import Dataset, DataLoader
-from utils.RealBatch import create_real_batch_data  # noqa
+from RealBatch import create_real_batch_data  # noqa


 class MalwareDetectionDataset(Dataset):
@ -66,7 +66,7 @@ def _simulating(_dataset, _batch_size: int):


 if __name__ == '__main__':
-    root_path: str = '/home/xiang/MalGraph/data/processed_dataset/DatasetJSON/'
+    root_path: str = '/home/king/python/MalGraph-main/data/processed_dataset/DatasetJSON'
    i_batch_size = 2
    
    train_dataset = MalwareDetectionDataset(root=root_path, train_or_test='train')