生成文件脚本

2023-12-28 17:01:36 +08:00 · 2023-12-28 17:01:36 +08:00 · 737afba0bc
commit 737afba0bc
parent da15051db9
5 changed files with 106 additions and 87 deletions
--- a/requirement_conda.txt
+++ b/requirement_conda.txt
@ -1,83 +1,79 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: linux-64
-_libgcc_mutex=0.1=main
-antlr4-python3-runtime=4.8=pypi_0
-ase=3.21.1=pypi_0
-ca-certificates=2021.1.19=h06a4308_1
-cached-property=1.5.2=pypi_0
-certifi=2020.12.5=py37h06a4308_0
-cffi=1.14.5=pypi_0
-chardet=4.0.0=pypi_0
-cmake=3.18.4.post1=pypi_0
-cycler=0.10.0=pypi_0
-dataclasses=0.6=pypi_0
-decorator=4.4.2=pypi_0
-future=0.18.2=pypi_0
-googledrivedownloader=0.4=pypi_0
-h5py=3.2.1=pypi_0
-hydra-core=1.0.6=pypi_0
-idna=2.10=pypi_0
-importlib-resources=5.1.2=pypi_0
-intel-openmp=2021.1.2=pypi_0
-isodate=0.6.0=pypi_0
-jinja2=2.11.3=pypi_0
-joblib=1.0.1=pypi_0
-kiwisolver=1.3.1=pypi_0
-ld_impl_linux-64=2.33.1=h53a641e_7
-libedit=3.1.20191231=h14c3975_1
-libffi=3.3=he6710b0_2
-libgcc-ng=9.1.0=hdf63c60_0
-libstdcxx-ng=9.1.0=hdf63c60_0
-llvmlite=0.35.0=pypi_0
-magma-cuda112=2.5.2=1
-markupsafe=1.1.1=pypi_0
-matplotlib=3.3.4=pypi_0
-mkl=2021.1.1=pypi_0
-mkl-include=2021.1.1=pypi_0
-ncurses=6.2=he6710b0_1
-networkx=2.5=pypi_0
-ninja=1.10.0.post2=pypi_0
-numba=0.52.0=pypi_0
-numpy=1.20.1=pypi_0
-omegaconf=2.0.6=pypi_0
-openssl=1.1.1j=h27cfd23_0
-pandas=1.2.3=pypi_0
-pillow=8.1.2=pypi_0
-pip=21.0.1=py37h06a4308_0
-prefetch-generator=1.0.1=pypi_0
-pycparser=2.20=pypi_0
-pyparsing=2.4.7=pypi_0
-python=3.7.9=h7579374_0
-python-dateutil=2.8.1=pypi_0
-python-louvain=0.15=pypi_0
-pytz=2021.1=pypi_0
-pyyaml=5.4.1=pypi_0
-rdflib=5.0.0=pypi_0
-readline=8.1=h27cfd23_0
-requests=2.25.1=pypi_0
-scikit-learn=0.24.1=pypi_0
-scipy=1.6.1=pypi_0
-seaborn=0.11.1=pypi_0
-setuptools=52.0.0=py37h06a4308_0
-six=1.15.0=pypi_0
-sqlite=3.33.0=h62c20be_0
-tbb=2021.1.1=pypi_0
-texttable=1.6.3=pypi_0
-threadpoolctl=2.1.0=pypi_0
-tk=8.6.10=hbc83047_0
-torch=1.8.0+cu111=pypi_0
-torch-cluster=1.5.9=pypi_0
-torch-geometric=1.6.3=pypi_0
-torch-scatter=2.0.6=pypi_0
-torch-sparse=0.6.9=pypi_0
-torch-spline-conv=1.2.1=pypi_0
-torchaudio=0.8.0=pypi_0
-torchvision=0.9.0+cu111=pypi_0
-tqdm=4.59.0=pypi_0
-typing-extensions=3.7.4.3=pypi_0
-urllib3=1.26.3=pypi_0
-wheel=0.36.2=pyhd3eb1b0_0
-xz=5.2.5=h7b6447c_0
-zipp=3.4.1=pypi_0
-zlib=1.2.11=h7b6447c_3
+
+antlr4-python3-runtime==4.8
+ase==3.21.1
+ca-certificates==2021.1.19
+cached-property==1.5.2
+certifi==2020.12.5
+cffi==1.14.5
+chardet==4.0.0
+cmake==3.18.4.post1
+cycler==0.10.0
+dataclasses==0.6
+decorator==4.4.2
+future==0.18.2
+googledrivedownloader==0.4
+h5py==3.2.1
+hydra-core==1.0.6
+idna==2.10
+importlib-resources==5.1.2
+intel-openmp==2021.1.2
+isodate==0.6.0
+jinja2==2.11.3
+joblib==1.0.1
+kiwisolver==1.3.1
+ld_impl_linux-64==2.33.1
+libedit==3.1.20191231
+libffi==3.3
+libgcc-ng==9.1.0
+libstdcxx-ng==9.1.0
+llvmlite==0.35.0
+magma-cuda112==2.5.2
+markupsafe==1.1.1
+matplotlib==3.3.4
+mkl==2021.1.1
+mkl-include==2021.1.1
+ncurses==6.2
+networkx==2.5
+ninja==1.10.0.post2
+numba==0.52.0
+numpy==1.20.1
+omegaconf==2.0.6
+openssl==1.1.1j
+pandas==1.2.3
+pillow==8.1.2
+pip==21.0.1
+prefetch-generator==1.0.1
+pycparser==2.20
+pyparsing==2.4.7
+python-dateutil==2.8.1
+python-louvain==0.15
+pytz==2021.1
+pyyaml==5.4.1
+rdflib==5.0.0
+readline==8.1
+requests==2.25.1
+scikit-learn==0.24.1
+scipy==1.6.1
+seaborn==0.11.1
+setuptools==52.0.0
+six==1.15.0
+sqlite==3.33.0
+tbb==2021.1.1
+texttable==1.6.3
+threadpoolctl==2.1.0
+tk==8.6.10
+torch==1.8.0+cu111
+torch-cluster==1.5.9
+torch-geometric==1.6.3
+torch-scatter==2.0.6
+torch-sparse==0.6.9
+torch-spline-conv==1.2.1
+torchaudio==0.8.0
+torchvision==0.9.0+cu111
+tqdm==4.59.0
+typing-extensions==3.7.4.3
+urllib3==1.26.3
+wheel==0.36.2
+xz==5.2.5
+zipp==3.4.1
+zlib==1.2.11
--- a/samples/PreProcess.py
+++ b/samples/PreProcess.py
@ -3,10 +3,11 @@ import torch
 from torch_geometric.data import Data
 from tqdm import tqdm

-from utils.Vocabulary import Vocab
+from src.utils.Vocabulary import Vocab


 def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):
+#def parse_json_list_2_pyg_object(jsonl_file: str):
    index = 0
    with open(jsonl_file, "r", encoding="utf-8") as file:
        for item in tqdm(file):
@ -35,7 +36,8 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab):

 if __name__ == '__main__':
    json_path = "./sample.jsonl"
-    train_vocab_file = "../ReservedDataCode/processed_dataset/train_external_function_name_vocab.jsonl"
+    train_vocab_file = "../data/processed_dataset/train_external_function_name_vocab.jsonl"
+    # train_vocab_file = "./res.jsonl"
    max_vocab_size = 10000
    vocabulary = Vocab(freq_file=train_vocab_file, max_vocab_size=max_vocab_size)
-    parse_json_list_2_pyg_object(jsonl_file=json_path, label=1, vocab=vocabulary)
+    parse_json_list_2_pyg_object(jsonl_file=json_path, label=1, vocab=vocabulary)
--- a/samples/funCount.py
+++ b/samples/funCount.py
@ -0,0 +1,21 @@
+import json
+
+from tqdm import tqdm
+
+if __name__ == '__main__':
+    file_name = './sample.jsonl'
+    fil = open(file_name, mode='r')
+    fun_name_dict = {}
+    for item in tqdm(fil):
+        item = json.loads(item)
+        item_fun_list = item['function_names']
+        for fun_name in item_fun_list:
+            if fun_name_dict.get(fun_name) is not None:
+                fun_name_dict[fun_name] += 1
+            else:
+                fun_name_dict[fun_name] = 1
+
+    with open('./res.jsonl','w') as file:
+        for key,value in fun_name_dict.items():
+            temp = {"f_name":key, "count":value}
+            file.write(json.dumps(temp) + '\n')
--- a/samples/sample.jsonl
+++ b/samples/sample.jsonl
@ -1 +0,0 @@
-{"function_edges": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]], "acfg_list": [{"block_number": 3, "block_edges": [[0, 0, 1, 1], [0, 2, 0, 2]], "block_features": [[0, 2, 1, 0, 7, 0, 1, 1, 4, 0, 0], [0, 2, 0, 0, 3, 1, 0, 1, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]]}, {"block_number": 29, "block_edges": [[0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 17, 18, 19, 19, 20, 20, 21, 21, 23, 24, 24, 26, 26, 27, 28], [16, 0, 2, 0, 4, 1, 3, 3, 3, 25, 15, 8, 6, 6, 7, 28, 12, 9, 23, 16, 25, 11, 21, 17, 13, 19, 22, 14, 19, 18, 27, 24, 23, 26, 21, 22, 25, 10, 25, 5, 14, 8]], "block_features": [[8, 2, 1, 5, 36, 0, 6, 0, 2, 0, 0], [0, 7, 0, 0, 3, 0, 1, 1, 1, 0, 0], [0, 7, 0, 0, 2, 0, 1, 1, 0, 0, 0], [0, 7, 0, 1, 8, 1, 2, 0, 0, 0, 0], [0, 7, 1, 0, 2, 0, 1, 0, 0, 0, 0], [0, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0], [1, 18, 0, 1, 9, 0, 2, 1, 1, 0, 0], [1, 21, 1, 0, 3, 0, 1, 1, 0, 0, 0], [0, 21, 0, 1, 4, 1, 2, 0, 0, 0, 0], [0, 24, 0, 2, 12, 1, 3, 0, 0, 0, 0], [1, 26, 0, 3, 16, 0, 4, 1, 4, 0, 0], [1, 2, 0, 5, 22, 0, 5, 0, 1, 0, 0], [5, 4, 1, 3, 21, 0, 4, 1, 3, 0, 0], [4, 11, 0, 2, 17, 1, 2, 0, 1, 0, 0], [2, 14, 0, 1, 12, 0, 2, 1, 1, 0, 0], [3, 17, 0, 0, 10, 0, 1, 0, 1, 0, 0], [1, 1, 0, 1, 5, 0, 2, 0, 0, 0, 0], [0, 14, 0, 0, 1, 0, 0, 0, 0, 0, 0], [3, 17, 0, 0, 7, 0, 0, 0, 0, 0, 0], [0, 17, 0, 1, 5, 0, 2, 1, 1, 0, 0], [2, 28, 1, 1, 11, 1, 2, 1, 1, 0, 0], [0, 11, 0, 1, 8, 1, 2, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0], [12, 27, 1, 7, 41, 0, 8, 1, 6, 0, 0], [0, 0, 1, 0, 7, 1, 0, 0, 0, 1, 0], [2, 9, 0, 2, 17, 0, 3, 1, 3, 0, 0], [2, 14, 0, 0, 5, 0, 1, 0, 4, 0, 0], [1, 21, 4, 1, 13, 0, 2, 0, 5, 0, 0]]}], "function_names": ["sub_401000", "start", "GetTempPathW", "GetFileSize", "GetCurrentDirectoryW", "DeleteFileW", "CloseHandle", "WriteFile", "lstrcmpW", "ReadFile", "GetModuleHandleW", "ExitProcess", "HeapCreate", "HeapAlloc", "GetModuleFileNameW", "CreateFileW", "lstrlenW", "ShellExecuteW", "wsprintfW", "HttpSendRequestW", "InternetSetOptionW", "InternetQueryOptionW", "HttpOpenRequestW", "HttpQueryInfoW", "InternetReadFile", "InternetConnectW", "InternetOpenW"], "hash": "316ebb797d5196020eee013cfe771671fff4da8859adc9f385f52a74e82f4e55", "function_number": 27}
--- a/src/utils/Vocabulary.py
+++ b/src/utils/Vocabulary.py
@ -64,6 +64,7 @@ class Vocab:
    def load_freq_counter_from_file(file_path: str, min_freq: int):
        freq_dict = {}
        with open(file_path, 'r') as f:
+
            for line in tqdm(f, desc="Load frequency list from the file of {} ... ".format(file_path)):
                line = json.loads(line)
                f_name = line["f_name"]
				`@ -1 +0,0 @@`
				{"function_edges": [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]], "acfg_list": [{"block_number": 3, "block_edges": [[0, 0, 1, 1], [0, 2, 0, 2]], "block_features": [[0, 2, 1, 0, 7, 0, 1, 1, 4, 0, 0], [0, 2, 0, 0, 3, 1, 0, 1, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]]}, {"block_number": 29, "block_edges": [[0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 12, 12, 13, 14, 14, 15, 16, 17, 18, 19, 19, 20, 20, 21, 21, 23, 24, 24, 26, 26, 27, 28], [16, 0, 2, 0, 4, 1, 3, 3, 3, 25, 15, 8, 6, 6, 7, 28, 12, 9, 23, 16, 25, 11, 21, 17, 13, 19, 22, 14, 19, 18, 27, 24, 23, 26, 21, 22, 25, 10, 25, 5, 14, 8]], "block_features": [[8, 2, 1, 5, 36, 0, 6, 0, 2, 0, 0], [0, 7, 0, 0, 3, 0, 1, 1, 1, 0, 0], [0, 7, 0, 0, 2, 0, 1, 1, 0, 0, 0], [0, 7, 0, 1, 8, 1, 2, 0, 0, 0, 0], [0, 7, 1, 0, 2, 0, 1, 0, 0, 0, 0], [0, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0], [1, 18, 0, 1, 9, 0, 2, 1, 1, 0, 0], [1, 21, 1, 0, 3, 0, 1, 1, 0, 0, 0], [0, 21, 0, 1, 4, 1, 2, 0, 0, 0, 0], [0, 24, 0, 2, 12, 1, 3, 0, 0, 0, 0], [1, 26, 0, 3, 16, 0, 4, 1, 4, 0, 0], [1, 2, 0, 5, 22, 0, 5, 0, 1, 0, 0], [5, 4, 1, 3, 21, 0, 4, 1, 3, 0, 0], [4, 11, 0, 2, 17, 1, 2, 0, 1, 0, 0], [2, 14, 0, 1, 12, 0, 2, 1, 1, 0, 0], [3, 17, 0, 0, 10, 0, 1, 0, 1, 0, 0], [1, 1, 0, 1, 5, 0, 2, 0, 0, 0, 0], [0, 14, 0, 0, 1, 0, 0, 0, 0, 0, 0], [3, 17, 0, 0, 7, 0, 0, 0, 0, 0, 0], [0, 17, 0, 1, 5, 0, 2, 1, 1, 0, 0], [2, 28, 1, 1, 11, 1, 2, 1, 1, 0, 0], [0, 11, 0, 1, 8, 1, 2, 0, 0, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0], [12, 27, 1, 7, 41, 0, 8, 1, 6, 0, 0], [0, 0, 1, 0, 7, 1, 0, 0, 0, 1, 0], [2, 9, 0, 2, 17, 0, 3, 1, 3, 0, 0], [2, 14, 0, 0, 5, 0, 1, 0, 4, 0, 0], [1, 21, 4, 1, 13, 0, 2, 0, 5, 0, 0]]}], "function_names": ["sub_401000", "start", "GetTempPathW", "GetFileSize", "GetCurrentDirectoryW", "DeleteFileW", "CloseHandle", "WriteFile", "lstrcmpW", "ReadFile", "GetModuleHandleW", "ExitProcess", "HeapCreate", "HeapAlloc", "GetModuleFileNameW", "CreateFileW", "lstrlenW", "ShellExecuteW", "wsprintfW", "HttpSendRequestW", "InternetSetOptionW", "InternetQueryOptionW", "HttpOpenRequestW", "HttpQueryInfoW", "InternetReadFile", "InternetConnectW", "InternetOpenW"], "hash": "316ebb797d5196020eee013cfe771671fff4da8859adc9f385f52a74e82f4e55", "function_number": 27}