备份
This commit is contained in:
parent
3df2fe07cb
commit
b4b131fc61
@ -1,9 +1,9 @@
|
||||
Data:
|
||||
preprocess_root: "/home/king/python/data/processed_dataset/DatasetJSON_remake"
|
||||
train_vocab_file: "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
|
||||
preprocess_root: "/home/king/python/data/DatasetJSON_remake"
|
||||
train_vocab_file: "/home/king/python/data/fun_name_sort.jsonl"
|
||||
max_vocab_size: 10000 # modify according to the result of 1BuildExternalVocab.py
|
||||
Training:
|
||||
cuda: True # enable GPU training if cuda is available
|
||||
cuda: False # enable GPU training if cuda is available
|
||||
dist_backend: "nccl" # if using torch.distribution, the backend to be used
|
||||
dist_port: "1234"
|
||||
max_epoches: 10
|
||||
|
@ -1,83 +1,83 @@
|
||||
# This file may be used to create an environment using:
|
||||
# $ conda create --name <env> --file <this file>
|
||||
# platform: linux-64
|
||||
_libgcc_mutex=0.1=main
|
||||
antlr4-python3-runtime=4.8=pypi_0
|
||||
ase=3.21.1=pypi_0
|
||||
ca-certificates=2021.1.19=h06a4308_1
|
||||
cached-property=1.5.2=pypi_0
|
||||
certifi=2020.12.5=py37h06a4308_0
|
||||
cffi=1.14.5=pypi_0
|
||||
chardet=4.0.0=pypi_0
|
||||
cmake=3.18.4.post1=pypi_0
|
||||
cycler=0.10.0=pypi_0
|
||||
dataclasses=0.6=pypi_0
|
||||
decorator=4.4.2=pypi_0
|
||||
future=0.18.2=pypi_0
|
||||
googledrivedownloader=0.4=pypi_0
|
||||
h5py=3.2.1=pypi_0
|
||||
hydra-core=1.0.6=pypi_0
|
||||
idna=2.10=pypi_0
|
||||
importlib-resources=5.1.2=pypi_0
|
||||
intel-openmp=2021.1.2=pypi_0
|
||||
isodate=0.6.0=pypi_0
|
||||
jinja2=2.11.3=pypi_0
|
||||
joblib=1.0.1=pypi_0
|
||||
kiwisolver=1.3.1=pypi_0
|
||||
ld_impl_linux-64=2.33.1=h53a641e_7
|
||||
libedit=3.1.20191231=h14c3975_1
|
||||
libffi=3.3=he6710b0_2
|
||||
libgcc-ng=9.1.0=hdf63c60_0
|
||||
libstdcxx-ng=9.1.0=hdf63c60_0
|
||||
llvmlite=0.35.0=pypi_0
|
||||
magma-cuda112=2.5.2=1
|
||||
markupsafe=1.1.1=pypi_0
|
||||
matplotlib=3.3.4=pypi_0
|
||||
mkl=2021.1.1=pypi_0
|
||||
mkl-include=2021.1.1=pypi_0
|
||||
ncurses=6.2=he6710b0_1
|
||||
networkx=2.5=pypi_0
|
||||
ninja=1.10.0.post2=pypi_0
|
||||
numba=0.52.0=pypi_0
|
||||
numpy=1.20.1=pypi_0
|
||||
omegaconf=2.0.6=pypi_0
|
||||
openssl=1.1.1j=h27cfd23_0
|
||||
pandas=1.2.3=pypi_0
|
||||
pillow=8.1.2=pypi_0
|
||||
pip=21.0.1=py37h06a4308_0
|
||||
prefetch-generator=1.0.1=pypi_0
|
||||
pycparser=2.20=pypi_0
|
||||
pyparsing=2.4.7=pypi_0
|
||||
python=3.7.9=h7579374_0
|
||||
python-dateutil=2.8.1=pypi_0
|
||||
python-louvain=0.15=pypi_0
|
||||
pytz=2021.1=pypi_0
|
||||
pyyaml=5.4.1=pypi_0
|
||||
rdflib=5.0.0=pypi_0
|
||||
readline=8.1=h27cfd23_0
|
||||
requests=2.25.1=pypi_0
|
||||
scikit-learn=0.24.1=pypi_0
|
||||
scipy=1.6.1=pypi_0
|
||||
seaborn=0.11.1=pypi_0
|
||||
setuptools=52.0.0=py37h06a4308_0
|
||||
six=1.15.0=pypi_0
|
||||
sqlite=3.33.0=h62c20be_0
|
||||
tbb=2021.1.1=pypi_0
|
||||
texttable=1.6.3=pypi_0
|
||||
threadpoolctl=2.1.0=pypi_0
|
||||
tk=8.6.10=hbc83047_0
|
||||
torch=1.8.0+cu111=pypi_0
|
||||
torch-cluster=1.5.9=pypi_0
|
||||
torch-geometric=1.6.3=pypi_0
|
||||
torch-scatter=2.0.6=pypi_0
|
||||
torch-sparse=0.6.9=pypi_0
|
||||
torch-spline-conv=1.2.1=pypi_0
|
||||
torchaudio=0.8.0=pypi_0
|
||||
torchvision=0.9.0+cu111=pypi_0
|
||||
tqdm=4.59.0=pypi_0
|
||||
typing-extensions=3.7.4.3=pypi_0
|
||||
urllib3=1.26.3=pypi_0
|
||||
wheel=0.36.2=pyhd3eb1b0_0
|
||||
xz=5.2.5=h7b6447c_0
|
||||
zipp=3.4.1=pypi_0
|
||||
zlib=1.2.11=h7b6447c_3
|
||||
_libgcc_mutex=0.1
|
||||
antlr4-python3-runtime=4.8
|
||||
ase=3.21.1
|
||||
ca-certificates=2021.1.19
|
||||
cached-property=1.5.2
|
||||
certifi=2020.12.5
|
||||
cffi=1.14.5
|
||||
chardet=4.0.0
|
||||
cmake=3.18.4.post1
|
||||
cycler=0.10.0
|
||||
dataclasses=0.6
|
||||
decorator=4.4.2
|
||||
future=0.18.2
|
||||
googledrivedownloader=0.4
|
||||
h5py=3.2.1
|
||||
hydra-core=1.0.6
|
||||
idna=2.10
|
||||
importlib-resources=5.1.2
|
||||
intel-openmp=2021.1.2
|
||||
isodate=0.6.0
|
||||
jinja2=2.11.3
|
||||
joblib=1.0.1
|
||||
kiwisolver=1.3.1
|
||||
ld_impl_linux-64=2.33.1
|
||||
libedit=3.1.20191231
|
||||
libffi=3.3
|
||||
libgcc-ng=9.1.0
|
||||
libstdcxx-ng=9.1.0
|
||||
llvmlite=0.35.0
|
||||
magma-cuda112=2.5.2
|
||||
markupsafe=1.1.1
|
||||
matplotlib=3.3.4
|
||||
mkl=2021.1.1
|
||||
mkl-include=2021.1.1
|
||||
ncurses=6.2
|
||||
networkx=2.5
|
||||
ninja=1.10.0.post2
|
||||
numba=0.52.0
|
||||
numpy=1.20.1
|
||||
omegaconf=2.0.6
|
||||
openssl=1.1.1j
|
||||
pandas=1.2.3
|
||||
pillow=8.1.2
|
||||
pip=21.0.1
|
||||
prefetch-generator=1.0.1
|
||||
pycparser=2.20
|
||||
pyparsing=2.4.7
|
||||
python=3.7.9
|
||||
python-dateutil=2.8.1
|
||||
python-louvain=0.15
|
||||
pytz=2021.1
|
||||
pyyaml=5.4.1
|
||||
rdflib=5.0.0
|
||||
readline=8.1
|
||||
requests=2.25.1
|
||||
scikit-learn=0.24.1
|
||||
scipy=1.6.1
|
||||
seaborn=0.11.1
|
||||
setuptools=52.0.0
|
||||
six=1.15.0
|
||||
sqlite=3.33.0
|
||||
tbb=2021.1.1
|
||||
texttable=1.6.3
|
||||
threadpoolctl=2.1.0
|
||||
tk=8.6.10
|
||||
torch=1.8.0+cu111
|
||||
torch-cluster=1.5.9
|
||||
torch-geometric=1.6.3
|
||||
torch-scatter=2.0.6
|
||||
torch-sparse=0.6.9
|
||||
torch-spline-conv=1.2.1
|
||||
torchaudio=0.8.0
|
||||
torchvision=0.9.0+cu111
|
||||
tqdm=4.59.0
|
||||
typing-extensions=3.7.4.3
|
||||
urllib3=1.26.3
|
||||
wheel=0.36.2
|
||||
xz=5.2.5
|
||||
zipp=3.4.1
|
||||
zlib=1.2.11
|
||||
|
@ -18,7 +18,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
|
||||
test_flag = True
|
||||
file_len = len(os.listdir(jsonl_file))
|
||||
|
||||
for file in tqdm(os.listdir(jsonl_file)):
|
||||
for file in tqdm(os.listdir(jsonl_file), desc=file_type):
|
||||
if index >= file_len * 0.8 and valid_flag:
|
||||
type_index += 1
|
||||
valid_flag = False
|
||||
@ -36,7 +36,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
|
||||
|
||||
def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: str, train_type: str, index: int):
|
||||
if not os.path.exists(save_path+f"{train_type}_{file_type}/"):
|
||||
os.mkdir(save_path+f"{train_type}_{file_type}/")
|
||||
os.makedirs(save_path+f"{train_type}_{file_type}/")
|
||||
with open(file, "r", encoding="utf-8") as item:
|
||||
line = item.readline()
|
||||
item = json.loads(line)
|
||||
@ -67,10 +67,10 @@ def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: s
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
malware_json_path = "/home/king/python/data/jsonl/infected_jsonl/"
|
||||
benign_json_path = "/home/king/python/data/jsonl/refind_jsonl/"
|
||||
train_vocab_file = "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
|
||||
save_vocab_file = "/home/king/python/data/processed_dataset/DatasetJSON_remake/"
|
||||
malware_json_path = "/home/king/python/data/jsonl/malware/"
|
||||
benign_json_path = "/home/king/python/data/jsonl/benign/"
|
||||
train_vocab_file = "/home/king/python/data/fun_name_sort.jsonl"
|
||||
save_vocab_file = "/home/king/python/data/DatasetJSON_remake/"
|
||||
file_type = ["malware", "benign"]
|
||||
max_vocab_size = 10000
|
||||
vocabulary = Vocab(freq_file=train_vocab_file, max_vocab_size=max_vocab_size)
|
||||
|
@ -16,7 +16,7 @@ from omegaconf import DictConfig
|
||||
from prefetch_generator import BackgroundGenerator
|
||||
from sklearn.metrics import roc_auc_score, roc_curve
|
||||
from torch import nn
|
||||
from torch_geometric.data import DataLoader
|
||||
from torch_geometric.loader import DataLoader
|
||||
from tqdm import tqdm
|
||||
|
||||
from models.HierarchicalGraphModel import HierarchicalGraphNeuralNetwork
|
||||
@ -26,6 +26,10 @@ from utils.PreProcessedDataset import MalwareDetectionDataset
|
||||
from utils.RealBatch import create_real_batch_data
|
||||
from utils.Vocabulary import Vocab
|
||||
|
||||
os.environ['TORCH_USE_CUDA_DSA'] = "1"
|
||||
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
|
||||
|
||||
|
||||
|
||||
class DataLoaderX(DataLoader):
|
||||
def __iter__(self):
|
||||
@ -304,8 +308,12 @@ def main_app(config: DictConfig):
|
||||
# num_gpus = 1
|
||||
num_gpus = torch.cuda.device_count()
|
||||
log.info("Total number of GPUs = {}".format(num_gpus))
|
||||
# try:
|
||||
# torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
|
||||
|
||||
# main_train_worker(0, num_gpus, _train_params, _model_params, _optim_params, log, log_result_file)
|
||||
best_model_file = os.path.join(os.getcwd(), 'LocalRank_{}_best_model.pt'.format(0))
|
||||
|
||||
else:
|
||||
|
@ -24,7 +24,7 @@ class MalwareDetectionDataset(Dataset):
|
||||
files.append(name)
|
||||
return files
|
||||
|
||||
def __len__(self):
|
||||
def len(self):
|
||||
# def len(self):
|
||||
# return 201
|
||||
return len(self.malware_files) + len(self.benign_files)
|
||||
|
@ -1,8 +1,9 @@
|
||||
import torch_geometric
|
||||
import torch
|
||||
if __name__ == '__main__':
|
||||
# print(torch.__version__)
|
||||
# print(torch.cuda.device_count())
|
||||
# print(torch.cuda.get_device_name())
|
||||
print(torch.cuda.nccl.is_available())
|
||||
print(torch.__version__)
|
||||
print(torch.cuda.device_count())
|
||||
print(torch.cuda.get_device_name())
|
||||
print(torch.cuda.is_available())
|
||||
# print(torch.cuda.nccl.is_available())
|
||||
print(torch.cuda.nccl.version())
|
||||
|
Loading…
Reference in New Issue
Block a user