This commit is contained in:
huihun 2024-04-29 17:31:04 +08:00
parent 3df2fe07cb
commit b4b131fc61
6 changed files with 105 additions and 96 deletions

View File

@ -1,9 +1,9 @@
Data:
preprocess_root: "/home/king/python/data/processed_dataset/DatasetJSON_remake"
train_vocab_file: "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
preprocess_root: "/home/king/python/data/DatasetJSON_remake"
train_vocab_file: "/home/king/python/data/fun_name_sort.jsonl"
max_vocab_size: 10000 # modify according to the result of 1BuildExternalVocab.py
Training:
cuda: True # enable GPU training if cuda is available
cuda: False # enable GPU training if cuda is available
dist_backend: "nccl" # if using torch.distribution, the backend to be used
dist_port: "1234"
max_epoches: 10

View File

@ -1,83 +1,83 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
_libgcc_mutex=0.1=main
antlr4-python3-runtime=4.8=pypi_0
ase=3.21.1=pypi_0
ca-certificates=2021.1.19=h06a4308_1
cached-property=1.5.2=pypi_0
certifi=2020.12.5=py37h06a4308_0
cffi=1.14.5=pypi_0
chardet=4.0.0=pypi_0
cmake=3.18.4.post1=pypi_0
cycler=0.10.0=pypi_0
dataclasses=0.6=pypi_0
decorator=4.4.2=pypi_0
future=0.18.2=pypi_0
googledrivedownloader=0.4=pypi_0
h5py=3.2.1=pypi_0
hydra-core=1.0.6=pypi_0
idna=2.10=pypi_0
importlib-resources=5.1.2=pypi_0
intel-openmp=2021.1.2=pypi_0
isodate=0.6.0=pypi_0
jinja2=2.11.3=pypi_0
joblib=1.0.1=pypi_0
kiwisolver=1.3.1=pypi_0
ld_impl_linux-64=2.33.1=h53a641e_7
libedit=3.1.20191231=h14c3975_1
libffi=3.3=he6710b0_2
libgcc-ng=9.1.0=hdf63c60_0
libstdcxx-ng=9.1.0=hdf63c60_0
llvmlite=0.35.0=pypi_0
magma-cuda112=2.5.2=1
markupsafe=1.1.1=pypi_0
matplotlib=3.3.4=pypi_0
mkl=2021.1.1=pypi_0
mkl-include=2021.1.1=pypi_0
ncurses=6.2=he6710b0_1
networkx=2.5=pypi_0
ninja=1.10.0.post2=pypi_0
numba=0.52.0=pypi_0
numpy=1.20.1=pypi_0
omegaconf=2.0.6=pypi_0
openssl=1.1.1j=h27cfd23_0
pandas=1.2.3=pypi_0
pillow=8.1.2=pypi_0
pip=21.0.1=py37h06a4308_0
prefetch-generator=1.0.1=pypi_0
pycparser=2.20=pypi_0
pyparsing=2.4.7=pypi_0
python=3.7.9=h7579374_0
python-dateutil=2.8.1=pypi_0
python-louvain=0.15=pypi_0
pytz=2021.1=pypi_0
pyyaml=5.4.1=pypi_0
rdflib=5.0.0=pypi_0
readline=8.1=h27cfd23_0
requests=2.25.1=pypi_0
scikit-learn=0.24.1=pypi_0
scipy=1.6.1=pypi_0
seaborn=0.11.1=pypi_0
setuptools=52.0.0=py37h06a4308_0
six=1.15.0=pypi_0
sqlite=3.33.0=h62c20be_0
tbb=2021.1.1=pypi_0
texttable=1.6.3=pypi_0
threadpoolctl=2.1.0=pypi_0
tk=8.6.10=hbc83047_0
torch=1.8.0+cu111=pypi_0
torch-cluster=1.5.9=pypi_0
torch-geometric=1.6.3=pypi_0
torch-scatter=2.0.6=pypi_0
torch-sparse=0.6.9=pypi_0
torch-spline-conv=1.2.1=pypi_0
torchaudio=0.8.0=pypi_0
torchvision=0.9.0+cu111=pypi_0
tqdm=4.59.0=pypi_0
typing-extensions=3.7.4.3=pypi_0
urllib3=1.26.3=pypi_0
wheel=0.36.2=pyhd3eb1b0_0
xz=5.2.5=h7b6447c_0
zipp=3.4.1=pypi_0
zlib=1.2.11=h7b6447c_3
_libgcc_mutex=0.1
antlr4-python3-runtime=4.8
ase=3.21.1
ca-certificates=2021.1.19
cached-property=1.5.2
certifi=2020.12.5
cffi=1.14.5
chardet=4.0.0
cmake=3.18.4.post1
cycler=0.10.0
dataclasses=0.6
decorator=4.4.2
future=0.18.2
googledrivedownloader=0.4
h5py=3.2.1
hydra-core=1.0.6
idna=2.10
importlib-resources=5.1.2
intel-openmp=2021.1.2
isodate=0.6.0
jinja2=2.11.3
joblib=1.0.1
kiwisolver=1.3.1
ld_impl_linux-64=2.33.1
libedit=3.1.20191231
libffi=3.3
libgcc-ng=9.1.0
libstdcxx-ng=9.1.0
llvmlite=0.35.0
magma-cuda112=2.5.2
markupsafe=1.1.1
matplotlib=3.3.4
mkl=2021.1.1
mkl-include=2021.1.1
ncurses=6.2
networkx=2.5
ninja=1.10.0.post2
numba=0.52.0
numpy=1.20.1
omegaconf=2.0.6
openssl=1.1.1j
pandas=1.2.3
pillow=8.1.2
pip=21.0.1
prefetch-generator=1.0.1
pycparser=2.20
pyparsing=2.4.7
python=3.7.9
python-dateutil=2.8.1
python-louvain=0.15
pytz=2021.1
pyyaml=5.4.1
rdflib=5.0.0
readline=8.1
requests=2.25.1
scikit-learn=0.24.1
scipy=1.6.1
seaborn=0.11.1
setuptools=52.0.0
six=1.15.0
sqlite=3.33.0
tbb=2021.1.1
texttable=1.6.3
threadpoolctl=2.1.0
tk=8.6.10
torch=1.8.0+cu111
torch-cluster=1.5.9
torch-geometric=1.6.3
torch-scatter=2.0.6
torch-sparse=0.6.9
torch-spline-conv=1.2.1
torchaudio=0.8.0
torchvision=0.9.0+cu111
tqdm=4.59.0
typing-extensions=3.7.4.3
urllib3=1.26.3
wheel=0.36.2
xz=5.2.5
zipp=3.4.1
zlib=1.2.11

View File

@ -18,7 +18,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
test_flag = True
file_len = len(os.listdir(jsonl_file))
for file in tqdm(os.listdir(jsonl_file)):
for file in tqdm(os.listdir(jsonl_file), desc=file_type):
if index >= file_len * 0.8 and valid_flag:
type_index += 1
valid_flag = False
@ -36,7 +36,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: str, train_type: str, index: int):
if not os.path.exists(save_path+f"{train_type}_{file_type}/"):
os.mkdir(save_path+f"{train_type}_{file_type}/")
os.makedirs(save_path+f"{train_type}_{file_type}/")
with open(file, "r", encoding="utf-8") as item:
line = item.readline()
item = json.loads(line)
@ -67,10 +67,10 @@ def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: s
if __name__ == '__main__':
malware_json_path = "/home/king/python/data/jsonl/infected_jsonl/"
benign_json_path = "/home/king/python/data/jsonl/refind_jsonl/"
train_vocab_file = "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
save_vocab_file = "/home/king/python/data/processed_dataset/DatasetJSON_remake/"
malware_json_path = "/home/king/python/data/jsonl/malware/"
benign_json_path = "/home/king/python/data/jsonl/benign/"
train_vocab_file = "/home/king/python/data/fun_name_sort.jsonl"
save_vocab_file = "/home/king/python/data/DatasetJSON_remake/"
file_type = ["malware", "benign"]
max_vocab_size = 10000
vocabulary = Vocab(freq_file=train_vocab_file, max_vocab_size=max_vocab_size)

View File

@ -16,7 +16,7 @@ from omegaconf import DictConfig
from prefetch_generator import BackgroundGenerator
from sklearn.metrics import roc_auc_score, roc_curve
from torch import nn
from torch_geometric.data import DataLoader
from torch_geometric.loader import DataLoader
from tqdm import tqdm
from models.HierarchicalGraphModel import HierarchicalGraphNeuralNetwork
@ -26,6 +26,10 @@ from utils.PreProcessedDataset import MalwareDetectionDataset
from utils.RealBatch import create_real_batch_data
from utils.Vocabulary import Vocab
os.environ['TORCH_USE_CUDA_DSA'] = "1"
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
class DataLoaderX(DataLoader):
def __iter__(self):
@ -304,8 +308,12 @@ def main_app(config: DictConfig):
# num_gpus = 1
num_gpus = torch.cuda.device_count()
log.info("Total number of GPUs = {}".format(num_gpus))
# try:
# torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
# except Exception as e:
# print(e)
torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
# main_train_worker(0, num_gpus, _train_params, _model_params, _optim_params, log, log_result_file)
best_model_file = os.path.join(os.getcwd(), 'LocalRank_{}_best_model.pt'.format(0))
else:

View File

@ -24,7 +24,7 @@ class MalwareDetectionDataset(Dataset):
files.append(name)
return files
def __len__(self):
def len(self):
# def len(self):
# return 201
return len(self.malware_files) + len(self.benign_files)

View File

@ -1,8 +1,9 @@
import torch_geometric
import torch
if __name__ == '__main__':
# print(torch.__version__)
# print(torch.cuda.device_count())
# print(torch.cuda.get_device_name())
print(torch.cuda.nccl.is_available())
print(torch.__version__)
print(torch.cuda.device_count())
print(torch.cuda.get_device_name())
print(torch.cuda.is_available())
# print(torch.cuda.nccl.is_available())
print(torch.cuda.nccl.version())