备份
This commit is contained in:
parent
3df2fe07cb
commit
b4b131fc61
@ -1,9 +1,9 @@
|
|||||||
Data:
|
Data:
|
||||||
preprocess_root: "/home/king/python/data/processed_dataset/DatasetJSON_remake"
|
preprocess_root: "/home/king/python/data/DatasetJSON_remake"
|
||||||
train_vocab_file: "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
|
train_vocab_file: "/home/king/python/data/fun_name_sort.jsonl"
|
||||||
max_vocab_size: 10000 # modify according to the result of 1BuildExternalVocab.py
|
max_vocab_size: 10000 # modify according to the result of 1BuildExternalVocab.py
|
||||||
Training:
|
Training:
|
||||||
cuda: True # enable GPU training if cuda is available
|
cuda: False # enable GPU training if cuda is available
|
||||||
dist_backend: "nccl" # if using torch.distribution, the backend to be used
|
dist_backend: "nccl" # if using torch.distribution, the backend to be used
|
||||||
dist_port: "1234"
|
dist_port: "1234"
|
||||||
max_epoches: 10
|
max_epoches: 10
|
||||||
|
@ -1,83 +1,83 @@
|
|||||||
# This file may be used to create an environment using:
|
# This file may be used to create an environment using:
|
||||||
# $ conda create --name <env> --file <this file>
|
# $ conda create --name <env> --file <this file>
|
||||||
# platform: linux-64
|
# platform: linux-64
|
||||||
_libgcc_mutex=0.1=main
|
_libgcc_mutex=0.1
|
||||||
antlr4-python3-runtime=4.8=pypi_0
|
antlr4-python3-runtime=4.8
|
||||||
ase=3.21.1=pypi_0
|
ase=3.21.1
|
||||||
ca-certificates=2021.1.19=h06a4308_1
|
ca-certificates=2021.1.19
|
||||||
cached-property=1.5.2=pypi_0
|
cached-property=1.5.2
|
||||||
certifi=2020.12.5=py37h06a4308_0
|
certifi=2020.12.5
|
||||||
cffi=1.14.5=pypi_0
|
cffi=1.14.5
|
||||||
chardet=4.0.0=pypi_0
|
chardet=4.0.0
|
||||||
cmake=3.18.4.post1=pypi_0
|
cmake=3.18.4.post1
|
||||||
cycler=0.10.0=pypi_0
|
cycler=0.10.0
|
||||||
dataclasses=0.6=pypi_0
|
dataclasses=0.6
|
||||||
decorator=4.4.2=pypi_0
|
decorator=4.4.2
|
||||||
future=0.18.2=pypi_0
|
future=0.18.2
|
||||||
googledrivedownloader=0.4=pypi_0
|
googledrivedownloader=0.4
|
||||||
h5py=3.2.1=pypi_0
|
h5py=3.2.1
|
||||||
hydra-core=1.0.6=pypi_0
|
hydra-core=1.0.6
|
||||||
idna=2.10=pypi_0
|
idna=2.10
|
||||||
importlib-resources=5.1.2=pypi_0
|
importlib-resources=5.1.2
|
||||||
intel-openmp=2021.1.2=pypi_0
|
intel-openmp=2021.1.2
|
||||||
isodate=0.6.0=pypi_0
|
isodate=0.6.0
|
||||||
jinja2=2.11.3=pypi_0
|
jinja2=2.11.3
|
||||||
joblib=1.0.1=pypi_0
|
joblib=1.0.1
|
||||||
kiwisolver=1.3.1=pypi_0
|
kiwisolver=1.3.1
|
||||||
ld_impl_linux-64=2.33.1=h53a641e_7
|
ld_impl_linux-64=2.33.1
|
||||||
libedit=3.1.20191231=h14c3975_1
|
libedit=3.1.20191231
|
||||||
libffi=3.3=he6710b0_2
|
libffi=3.3
|
||||||
libgcc-ng=9.1.0=hdf63c60_0
|
libgcc-ng=9.1.0
|
||||||
libstdcxx-ng=9.1.0=hdf63c60_0
|
libstdcxx-ng=9.1.0
|
||||||
llvmlite=0.35.0=pypi_0
|
llvmlite=0.35.0
|
||||||
magma-cuda112=2.5.2=1
|
magma-cuda112=2.5.2
|
||||||
markupsafe=1.1.1=pypi_0
|
markupsafe=1.1.1
|
||||||
matplotlib=3.3.4=pypi_0
|
matplotlib=3.3.4
|
||||||
mkl=2021.1.1=pypi_0
|
mkl=2021.1.1
|
||||||
mkl-include=2021.1.1=pypi_0
|
mkl-include=2021.1.1
|
||||||
ncurses=6.2=he6710b0_1
|
ncurses=6.2
|
||||||
networkx=2.5=pypi_0
|
networkx=2.5
|
||||||
ninja=1.10.0.post2=pypi_0
|
ninja=1.10.0.post2
|
||||||
numba=0.52.0=pypi_0
|
numba=0.52.0
|
||||||
numpy=1.20.1=pypi_0
|
numpy=1.20.1
|
||||||
omegaconf=2.0.6=pypi_0
|
omegaconf=2.0.6
|
||||||
openssl=1.1.1j=h27cfd23_0
|
openssl=1.1.1j
|
||||||
pandas=1.2.3=pypi_0
|
pandas=1.2.3
|
||||||
pillow=8.1.2=pypi_0
|
pillow=8.1.2
|
||||||
pip=21.0.1=py37h06a4308_0
|
pip=21.0.1
|
||||||
prefetch-generator=1.0.1=pypi_0
|
prefetch-generator=1.0.1
|
||||||
pycparser=2.20=pypi_0
|
pycparser=2.20
|
||||||
pyparsing=2.4.7=pypi_0
|
pyparsing=2.4.7
|
||||||
python=3.7.9=h7579374_0
|
python=3.7.9
|
||||||
python-dateutil=2.8.1=pypi_0
|
python-dateutil=2.8.1
|
||||||
python-louvain=0.15=pypi_0
|
python-louvain=0.15
|
||||||
pytz=2021.1=pypi_0
|
pytz=2021.1
|
||||||
pyyaml=5.4.1=pypi_0
|
pyyaml=5.4.1
|
||||||
rdflib=5.0.0=pypi_0
|
rdflib=5.0.0
|
||||||
readline=8.1=h27cfd23_0
|
readline=8.1
|
||||||
requests=2.25.1=pypi_0
|
requests=2.25.1
|
||||||
scikit-learn=0.24.1=pypi_0
|
scikit-learn=0.24.1
|
||||||
scipy=1.6.1=pypi_0
|
scipy=1.6.1
|
||||||
seaborn=0.11.1=pypi_0
|
seaborn=0.11.1
|
||||||
setuptools=52.0.0=py37h06a4308_0
|
setuptools=52.0.0
|
||||||
six=1.15.0=pypi_0
|
six=1.15.0
|
||||||
sqlite=3.33.0=h62c20be_0
|
sqlite=3.33.0
|
||||||
tbb=2021.1.1=pypi_0
|
tbb=2021.1.1
|
||||||
texttable=1.6.3=pypi_0
|
texttable=1.6.3
|
||||||
threadpoolctl=2.1.0=pypi_0
|
threadpoolctl=2.1.0
|
||||||
tk=8.6.10=hbc83047_0
|
tk=8.6.10
|
||||||
torch=1.8.0+cu111=pypi_0
|
torch=1.8.0+cu111
|
||||||
torch-cluster=1.5.9=pypi_0
|
torch-cluster=1.5.9
|
||||||
torch-geometric=1.6.3=pypi_0
|
torch-geometric=1.6.3
|
||||||
torch-scatter=2.0.6=pypi_0
|
torch-scatter=2.0.6
|
||||||
torch-sparse=0.6.9=pypi_0
|
torch-sparse=0.6.9
|
||||||
torch-spline-conv=1.2.1=pypi_0
|
torch-spline-conv=1.2.1
|
||||||
torchaudio=0.8.0=pypi_0
|
torchaudio=0.8.0
|
||||||
torchvision=0.9.0+cu111=pypi_0
|
torchvision=0.9.0+cu111
|
||||||
tqdm=4.59.0=pypi_0
|
tqdm=4.59.0
|
||||||
typing-extensions=3.7.4.3=pypi_0
|
typing-extensions=3.7.4.3
|
||||||
urllib3=1.26.3=pypi_0
|
urllib3=1.26.3
|
||||||
wheel=0.36.2=pyhd3eb1b0_0
|
wheel=0.36.2
|
||||||
xz=5.2.5=h7b6447c_0
|
xz=5.2.5
|
||||||
zipp=3.4.1=pypi_0
|
zipp=3.4.1
|
||||||
zlib=1.2.11=h7b6447c_3
|
zlib=1.2.11
|
||||||
|
@ -18,7 +18,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
|
|||||||
test_flag = True
|
test_flag = True
|
||||||
file_len = len(os.listdir(jsonl_file))
|
file_len = len(os.listdir(jsonl_file))
|
||||||
|
|
||||||
for file in tqdm(os.listdir(jsonl_file)):
|
for file in tqdm(os.listdir(jsonl_file), desc=file_type):
|
||||||
if index >= file_len * 0.8 and valid_flag:
|
if index >= file_len * 0.8 and valid_flag:
|
||||||
type_index += 1
|
type_index += 1
|
||||||
valid_flag = False
|
valid_flag = False
|
||||||
@ -36,7 +36,7 @@ def parse_json_list_2_pyg_object(jsonl_file: str, label: int, vocab: Vocab, save
|
|||||||
|
|
||||||
def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: str, train_type: str, index: int):
|
def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: str, train_type: str, index: int):
|
||||||
if not os.path.exists(save_path+f"{train_type}_{file_type}/"):
|
if not os.path.exists(save_path+f"{train_type}_{file_type}/"):
|
||||||
os.mkdir(save_path+f"{train_type}_{file_type}/")
|
os.makedirs(save_path+f"{train_type}_{file_type}/")
|
||||||
with open(file, "r", encoding="utf-8") as item:
|
with open(file, "r", encoding="utf-8") as item:
|
||||||
line = item.readline()
|
line = item.readline()
|
||||||
item = json.loads(line)
|
item = json.loads(line)
|
||||||
@ -67,10 +67,10 @@ def json_to_pt(file: str, label: int, vocab: Vocab, save_path: str, file_type: s
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
malware_json_path = "/home/king/python/data/jsonl/infected_jsonl/"
|
malware_json_path = "/home/king/python/data/jsonl/malware/"
|
||||||
benign_json_path = "/home/king/python/data/jsonl/refind_jsonl/"
|
benign_json_path = "/home/king/python/data/jsonl/benign/"
|
||||||
train_vocab_file = "/home/king/python/data/processed_dataset/train_external_function_name_vocab.jsonl"
|
train_vocab_file = "/home/king/python/data/fun_name_sort.jsonl"
|
||||||
save_vocab_file = "/home/king/python/data/processed_dataset/DatasetJSON_remake/"
|
save_vocab_file = "/home/king/python/data/DatasetJSON_remake/"
|
||||||
file_type = ["malware", "benign"]
|
file_type = ["malware", "benign"]
|
||||||
max_vocab_size = 10000
|
max_vocab_size = 10000
|
||||||
vocabulary = Vocab(freq_file=train_vocab_file, max_vocab_size=max_vocab_size)
|
vocabulary = Vocab(freq_file=train_vocab_file, max_vocab_size=max_vocab_size)
|
||||||
|
@ -16,7 +16,7 @@ from omegaconf import DictConfig
|
|||||||
from prefetch_generator import BackgroundGenerator
|
from prefetch_generator import BackgroundGenerator
|
||||||
from sklearn.metrics import roc_auc_score, roc_curve
|
from sklearn.metrics import roc_auc_score, roc_curve
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch_geometric.data import DataLoader
|
from torch_geometric.loader import DataLoader
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from models.HierarchicalGraphModel import HierarchicalGraphNeuralNetwork
|
from models.HierarchicalGraphModel import HierarchicalGraphNeuralNetwork
|
||||||
@ -26,6 +26,10 @@ from utils.PreProcessedDataset import MalwareDetectionDataset
|
|||||||
from utils.RealBatch import create_real_batch_data
|
from utils.RealBatch import create_real_batch_data
|
||||||
from utils.Vocabulary import Vocab
|
from utils.Vocabulary import Vocab
|
||||||
|
|
||||||
|
os.environ['TORCH_USE_CUDA_DSA'] = "1"
|
||||||
|
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DataLoaderX(DataLoader):
|
class DataLoaderX(DataLoader):
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
@ -304,8 +308,12 @@ def main_app(config: DictConfig):
|
|||||||
# num_gpus = 1
|
# num_gpus = 1
|
||||||
num_gpus = torch.cuda.device_count()
|
num_gpus = torch.cuda.device_count()
|
||||||
log.info("Total number of GPUs = {}".format(num_gpus))
|
log.info("Total number of GPUs = {}".format(num_gpus))
|
||||||
|
# try:
|
||||||
|
# torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
|
||||||
|
# except Exception as e:
|
||||||
|
# print(e)
|
||||||
torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
|
torch_mp.spawn(main_train_worker, nprocs=num_gpus, args=(num_gpus, _train_params, _model_params, _optim_params, log, log_result_file,))
|
||||||
|
# main_train_worker(0, num_gpus, _train_params, _model_params, _optim_params, log, log_result_file)
|
||||||
best_model_file = os.path.join(os.getcwd(), 'LocalRank_{}_best_model.pt'.format(0))
|
best_model_file = os.path.join(os.getcwd(), 'LocalRank_{}_best_model.pt'.format(0))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -24,7 +24,7 @@ class MalwareDetectionDataset(Dataset):
|
|||||||
files.append(name)
|
files.append(name)
|
||||||
return files
|
return files
|
||||||
|
|
||||||
def __len__(self):
|
def len(self):
|
||||||
# def len(self):
|
# def len(self):
|
||||||
# return 201
|
# return 201
|
||||||
return len(self.malware_files) + len(self.benign_files)
|
return len(self.malware_files) + len(self.benign_files)
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
import torch_geometric
|
import torch_geometric
|
||||||
import torch
|
import torch
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# print(torch.__version__)
|
print(torch.__version__)
|
||||||
# print(torch.cuda.device_count())
|
print(torch.cuda.device_count())
|
||||||
# print(torch.cuda.get_device_name())
|
print(torch.cuda.get_device_name())
|
||||||
print(torch.cuda.nccl.is_available())
|
print(torch.cuda.is_available())
|
||||||
|
# print(torch.cuda.nccl.is_available())
|
||||||
print(torch.cuda.nccl.version())
|
print(torch.cuda.nccl.version())
|
||||||
|
Loading…
Reference in New Issue
Block a user