diff --git a/.idea/deployment.xml b/.idea/deployment.xml
index 81b14c5..2ef2c9d 100644
--- a/.idea/deployment.xml
+++ b/.idea/deployment.xml
@@ -2,7 +2,14 @@
 <project version="4">
   <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
     <serverData>
-      <paths name="root@region-41.seetacloud.com:29208">
+      <paths name="304">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="root@region-42.seetacloud.com:58034 password">
         <serverdata>
           <mappings>
             <mapping local="$PROJECT_DIR$" web="/" />
diff --git a/Genius3/raw-feature-extractor/HierarchicalGraphModel_mine.py b/Genius3/raw-feature-extractor/HierarchicalGraphModel_mine.py
new file mode 100644
index 0000000..b28ad47
--- /dev/null
+++ b/Genius3/raw-feature-extractor/HierarchicalGraphModel_mine.py
@@ -0,0 +1,81 @@
+class HierarchicalGraphNeuralNetwork(nn.Module):
+    def __init__(self, external_vocab: Vocab):
+        super(HierarchicalGraphNeuralNetwork, self).__init__()
+        self.pool = 'global_max_pool'
+        # Hierarchical 1: Control Flow Graph (CFG) embedding and pooling
+        cfg_filter_list =[200, 200]
+        cfg_filter_list.insert(0, 11)
+        self.cfg_filter_length = len(cfg_filter_list)
+        cfg_graphsage_params = [dict(in_channels=cfg_filter_list[i], out_channels=cfg_filter_list[i + 1], bias=True) for
+                                i in range(self.cfg_filter_length - 1)]
+        cfg_conv = dict(constructor=torch_geometric.nn.conv.SAGEConv, kwargs=cfg_graphsage_params)
+        cfg_constructor = cfg_conv['constructor']
+        for i in range(self.cfg_filter_length - 1):
+            setattr(self, 'CFG_gnn_{}'.format(i + 1), cfg_constructor(**cfg_conv['kwargs'][i]))
+        self.dropout = nn.Dropout(p=0.2)
+        # Hierarchical 2: Function Call Graph (FCG) embedding and pooling
+        self.external_embedding_layer = nn.Embedding(num_embeddings=external_vocab.max_vocab_size + 2,
+                                                     embedding_dim=cfg_filter_list[-1],
+                                                     padding_idx=external_vocab.pad_idx)
+        fcg_filter_list = [200, 200]
+        fcg_filter_list.insert(0, cfg_filter_list[-1])
+        self.fcg_filter_length = len(fcg_filter_list)
+        fcg_graphsage_params = [dict(in_channels=fcg_filter_list[i], out_channels=fcg_filter_list[i + 1], bias=True) for
+                                i in range(self.fcg_filter_length - 1)]
+        fcg_conv = dict(constructor=torch_geometric.nn.conv.SAGEConv, kwargs=fcg_graphsage_params)
+        fcg_constructor = fcg_conv['constructor']
+        for i in range(self.fcg_filter_length - 1):
+            setattr(self, 'FCG_gnn_{}'.format(i + 1), fcg_constructor(**fcg_conv['kwargs'][i]))
+        # Last Projection Function: gradually project with more linear layers
+        self.pj1 = torch.nn.Linear(in_features=fcg_filter_list[-1], out_features=int(fcg_filter_list[-1] / 2))
+        self.pj2 = torch.nn.Linear(in_features=int(fcg_filter_list[-1] / 2), out_features=int(fcg_filter_list[-1] / 4))
+        self.pj3 = torch.nn.Linear(in_features=int(fcg_filter_list[-1] / 4), out_features=6)
+        self.last_activation = nn.Softmax(dim=1)
+
+    def forward(self, real_local_batch: Batch, real_bt_positions: list, bt_external_names: list,
+                bt_all_function_edges: list):
+        rtn_local_batch = self.forward_cfg_gnn(local_batch=real_local_batch)
+        x_cfg_pool = torch_geometric.nn.glob.global_max_pool(x=rtn_local_batch.x, batch=rtn_local_batch.batch)
+        fcg_list = []
+        fcg_internal_list = []
+        for idx_batch in range(len(real_bt_positions) - 1):
+            start_pos, end_pos = real_bt_positions[idx_batch: idx_batch + 2]
+            idx_x_cfg = x_cfg_pool[start_pos: end_pos]
+            fcg_internal_list.append(idx_x_cfg)
+            idx_x_external = self.external_embedding_layer(
+                torch.tensor([bt_external_names[idx_batch]], dtype=torch.long))
+            idx_x_external = idx_x_external.squeeze(dim=0)
+            idx_x_total = torch.cat([idx_x_cfg, idx_x_external], dim=0)
+            idx_function_edge = torch.tensor(bt_all_function_edges[idx_batch], dtype=torch.long)
+            idx_graph_data = Data(x=idx_x_total, edge_index=idx_function_edge)
+            idx_graph_data.validate()
+            fcg_list.append(idx_graph_data)
+        fcg_batch = Batch.from_data_list(fcg_list)
+        # Hierarchical 2: Function Call Graph (FCG) embedding and pooling
+        rtn_fcg_batch = self.forward_fcg_gnn(function_batch=fcg_batch)  # [batch_size, max_node_size, dim]
+        x_fcg_pool = torch_geometric.nn.glob.global_max_pool(x=rtn_fcg_batch.x, batch=rtn_fcg_batch.batch)
+        batch_final = x_fcg_pool
+        # step last project to the number_of_classes (multiclass)
+        bt_final_embed = self.pj3(self.pj2(self.pj1(batch_final)))
+        bt_pred = self.last_activation(bt_final_embed)
+        return bt_pred
+
+    def forward_cfg_gnn(self, local_batch: Batch):
+        in_x, edge_index = local_batch.x, local_batch.edge_index
+        for i in range(self.cfg_filter_length - 1):
+            out_x = getattr(self, 'CFG_gnn_{}'.format(i + 1))(x=in_x, edge_index=edge_index)
+            out_x = torch.nn.functional.relu(out_x, inplace=True)
+            out_x = self.dropout(out_x)
+            in_x = out_x
+        local_batch.x = in_x
+        return local_batch
+
+    def forward_fcg_gnn(self, function_batch: Batch):
+        in_x, edge_index = function_batch.x, function_batch.edge_index
+        for i in range(self.fcg_filter_length - 1):
+            out_x = getattr(self, 'FCG_gnn_{}'.format(i + 1))(x=in_x, edge_index=edge_index)
+            out_x = torch.nn.functional.relu(out_x, inplace=True)
+            out_x = self.dropout(out_x)
+            in_x = out_x
+        function_batch.x = in_x
+        return function_batch
\ No newline at end of file
diff --git a/Genius3/raw-feature-extractor/convert_pkl_to_json.py b/Genius3/raw-feature-extractor/convert_pkl_to_json.py
index 81afb45..56bae4c 100644
--- a/Genius3/raw-feature-extractor/convert_pkl_to_json.py
+++ b/Genius3/raw-feature-extractor/convert_pkl_to_json.py
@@ -6,7 +6,7 @@ import os
 from tqdm import tqdm
 
 
-def convert(start, end):
+def convert(start, end, overhaul):
     for workflow in range(start, end):
         # workflow = 0
         cfg_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
@@ -16,6 +16,12 @@ def convert(start, end):
         log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_log{}.log".format(workflow)
         process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_process_log{}.log".format(workflow)
 
+        if overhaul:
+            if os.path.exists(log_path):
+                os.remove(log_path)
+            if os.path.exists(process_log_path):
+                os.remove(process_log_path)
+
         with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log:
             logged = log.readline()
             if logged == '':
@@ -112,5 +118,116 @@ def convert(start, end):
                     process_log.write("index {}, {} process done.\n".format(index, cfg))
 
 
+def convert_benign(overhaul):
+    cfg_dir = "D:\\hkn\\infected\\datasets\\benign_cfg\\new"
+    output_dir = "D:\\hkn\\infected\\datasets\\benign_json\\new"
+    dot_dir = "D:\\hkn\\infected\\datasets\\benign_dot\\new"
+
+    log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_log.log"
+    process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_process_log{}.log"
+
+    if overhaul:
+        if os.path.exists(log_path):
+            os.remove(log_path)
+        if os.path.exists(process_log_path):
+            os.remove(process_log_path)
+
+    with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log:
+        logged = log.readline()
+        if logged == '':
+            log_index = 0
+        else:
+            log_index = int(logged)
+
+        for index, cfg in enumerate(tqdm(os.listdir(cfg_dir))):
+            if index < log_index:
+                continue
+
+            name = cfg[:-4]  # 纯文件名
+            cfg_file = open(os.path.join(cfg_dir, name + '.ida'), 'r')
+            try:
+                data = pk.load(cfg_file)
+            except EOFError:
+                process_log.write("index {}, {} process failed. EOFError occurred.\n".format(index, cfg))
+                continue
+            except ValueError:
+                process_log.write("index {}, {} process failed. ValueError occurred.\n".format(index, cfg))
+                continue
+            finally:
+                cfg_file.close()
+
+            dot_file_path = os.path.join(dot_dir, name + '.dot')
+            if not os.path.exists(dot_file_path):
+                process_log.write("index {}, {} process failed. dot file not exists.\n".format(index, cfg))
+            else:
+                # 打开dot文件获取fcg
+                raw_function_edges = []
+                # 2023.8.12 bug fix: ida生成的fcg(.dot)文件包含了所有函数，data.raw_graph_list仅包含了内部函数
+                functions_list = []
+                with open(dot_file_path, 'r') as dot:
+                    for line in dot:
+                        if '->' in line:
+                            raw_function_edges.append(re.findall(r'\b\d+\b', line))
+                        elif 'label' in line:
+                            functions_list.append(line[line.find('= "') + 3:line.find('",')])
+
+                # 没有内部函数被检测到，正常来说不应该，保险起见还是不要这数据了
+                if raw_function_edges.__len__() == 0:
+                    continue
+
+                # 为当前pe文件创建json对象
+                json_obj = {
+                    'hash': data.binary_name[11:],
+                    # 2023.8.12 bug fix: 这里获取的是内部函数的数量
+                    # 'function_number': data.raw_graph_list.__len__(),
+                    'function_number': len(functions_list),
+                    'function_edges': [[int(d[0]) for d in raw_function_edges],
+                                       [int(d[1]) for d in raw_function_edges]],
+                    'acfg_list': [],
+                    'function_names': functions_list
+                }
+
+                # 2023.8.12 bug fix: data.raw_graph_list是ida检测到的内部函数，不包括外部函数，因此函数列表和函数数量不能从这里获取
+                # 读取pkl文件，一个acfg由一个函数分解而来
+                for acfg in data.raw_graph_list:
+                    # 函数为外部函数，不需要构建cfg
+                    if acfg.funcname != 'start' and acfg.funcname != 'start_0' and 'sub_' not in acfg.funcname:
+                        continue
+
+                    # 这里2是因为Genius框架提取特征时将后代数量放在2
+                    offspring = [d.get('v')[2] for d in acfg.g.node.values()]
+                    # 这边可能会出现不知名的原因两个数组长度不一致，按理来说应该是一致的
+                    # 以框架为主，将bb_features数组削减为和g.node长度一致
+                    diff = acfg.g.__len__() - len(acfg.bb_features)
+                    if diff != 0:
+                        del acfg.bb_features[diff:]
+                    # 将后代数量的特征放入bb_features中
+
+                    for i, offs in enumerate(offspring):
+                        acfg.bb_features[i].append(offs)
+
+                    acfg_item = {
+                        'block_number': acfg.g.__len__(),
+                        'block_edges': [[d[0] for d in acfg.g.edges], [d[1] for d in acfg.g.edges]],
+                        'block_features': acfg.bb_features
+                    }
+
+                    json_obj['acfg_list'].append(acfg_item)
+                    # json_obj['function_names'].append(acfg.funcname)
+
+                # 将结果写入json本地文件
+                result = json.dumps(json_obj, ensure_ascii=False)
+
+                with open(os.path.join(output_dir, name + '.jsonl'), 'w') as out:
+                    out.write(result)
+
+                log.truncate(0)
+                log.seek(0)
+                log.write(str(index))
+                log.flush()
+                process_log.write("index {}, {} process done.\n".format(index, cfg))
+
+
 if __name__ == '__main__':
-    convert(35, 69)
+    # convert(35, 69)
+    convert_benign(True)
diff --git a/Genius3/raw-feature-extractor/ida_batch.py b/Genius3/raw-feature-extractor/ida_batch.py
index 490a24d..8f5bcdf 100644
--- a/Genius3/raw-feature-extractor/ida_batch.py
+++ b/Genius3/raw-feature-extractor/ida_batch.py
@@ -19,10 +19,75 @@ def call_preprocess(cmd_line):
     subprocess.call(cmd_line, shell=True)
 
 
-def batch_mode(start, end):
+# 良性软件分析模式，ida的命令中将workflow改为-1
+def benign_batch_mode(overhaul):
+    # 总失败数据数量
+    total_failed = 0
+
+    log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log_benign.log'
+    process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log_benign.log'
+    benign_pe_dir = 'D:\\hkn\\infected\\datasets\\benign\\new'
+
+    if overhaul:
+        if os.path.exists(log_path):
+            os.remove(log_path)
+        if os.path.exists(process_log_path):
+            os.remove(process_log_path)
+
+    with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log:
+        logged = log.readline()
+        if logged == '':
+            log_index = 0
+        else:
+            log_index = int(logged)
+
+        for index, pe in enumerate(tqdm(sorted(os.listdir(benign_pe_dir)))):
+            if index < log_index:
+                continue
+
+            cmd_line = r'idaq64 -c -A -S"D:\hkn\project_folder\Gencoding3\Genius3\raw-feature-extractor\preprocessing_ida.py -1" -oF:\iout {}'.format(
+                os.path.join(benign_pe_dir, pe))
+
+            p = multiprocessing.Process(target=call_preprocess, args=[cmd_line])
+            p.start()
+            flag_kill = True
+            start = time.time()
+            while time.time() - start <= TIMEOUT:
+                if not p.is_alive():
+                    flag_kill = False
+                    break
+                else:
+                    time.sleep(1)
+
+            if flag_kill:
+                subprocess.call('taskkill /im idaq64.exe /f')
+                process_log.write(
+                    "index {}, {} stuck, process terminated.\n".format(index, pe))
+
+                total_failed += 1
+            else:
+                # 正常运行结束
+                log.truncate(0)
+                log.seek(0)
+                log.write(str(index))
+                log.flush()
+                process_log.write("index {}, {} process done.\n".format(index, pe))
+    # 所有副产物删除
+    delete_output()
+
+    print('总失败数{}'.format(total_failed))
+
+
+def mal_batch_mode(start, end):
     # 只选其中这些类的pe进行分析，其他的就直接跳过
     families_need_to_analyze = {'wacatac': 0, 'glupteba': 0, 'ulpm': 0, 'fugrafa': 0, 'tiggre': 0,
                                 'redcap': 0, 'generickdz': 0, 'berbew': 0, 'agenttesla': 0, 'lazy': 0}
+    # 记录ida处理报错的数据来自哪些家族
+    failed_family = {'wacatac': 0, 'glupteba': 0, 'ulpm': 0, 'fugrafa': 0, 'tiggre': 0,
+                     'redcap': 0, 'generickdz': 0, 'berbew': 0, 'agenttesla': 0, 'lazy': 0}
+    # 总失败数据数量
+    total_failed = 0
+
     for workflow in range(start, end):
         # pe_dir = 'D:\\hkn\\infected\\datasets\\virusshare_test'
         pe_dir = 'D:\\hkn\\infected\\datasets\\virusshare_infected{}'.format(workflow)
@@ -73,6 +138,9 @@ def batch_mode(start, end):
                     subprocess.call('taskkill /im idaq64.exe /f')
                     process_log.write(
                         "index {}, {} in workflow {} stuck, process terminated.\n".format(index, pe, workflow))
+
+                    failed_family[pe_family] += 1
+                    total_failed += 1
                 else:
                     # 正常运行结束
                     log.truncate(0)
@@ -85,6 +153,10 @@ def batch_mode(start, end):
         # 一次workflow结束后将所有副产物删除
         delete_output()
 
+    print(families_need_to_analyze)
+    print('\n')
+    print(failed_family, '总失败数{}'.format(total_failed))
+
 
 def delete_output():
     out_dir = 'F:\\iout'
@@ -96,4 +168,5 @@ def delete_output():
 # 注意：该py文件必须放在IDA的根目录下，且必须使用cmd命令执行，否则无法链接到python库
 # F:\\kkk\\IDA_6.6
 if __name__ == '__main__':
-    batch_mode(36, 69)
+    benign_batch_mode(True)
+    # mal_batch_mode(35, 69)
diff --git a/Genius3/raw-feature-extractor/preprocessing_ida.py b/Genius3/raw-feature-extractor/preprocessing_ida.py
index fd24ec7..507d83c 100644
--- a/Genius3/raw-feature-extractor/preprocessing_ida.py
+++ b/Genius3/raw-feature-extractor/preprocessing_ida.py
@@ -1,11 +1,8 @@
 # -*- coding: UTF-8 -*-
 import pickle
 from func import *
-from raw_graphs import *
 from idc import *
-import idautils
 import os
-import sys
 
 
 def preprocess():
@@ -18,9 +15,13 @@ def preprocess():
     binary_name = idc.GetInputFile()
 
     workflow = idc.ARGV[1]
-    # workflow = 0
-    cfg_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
-    gdl_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot\\{}.dot".format(workflow, binary_name)
+    # workflow为特定值时分析良性软件，否则分析恶意软件
+    if workflow == '-1':
+        cfg_path = "D:\\hkn\\infected\\datasets\\benign_cfg\\new"
+        gdl_path = "D:\\hkn\\infected\\datasets\\benign_dot\\new\\{}.dot".format(binary_name)
+    else:
+        cfg_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
+        gdl_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot\\{}.dot".format(workflow, binary_name)
 
     analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
     analysis_flags &= ~idc.AF_IMMOFF
diff --git a/Genius3/raw-feature-extractor/test.py b/Genius3/raw-feature-extractor/test.py
index 6e3c460..722739c 100644
--- a/Genius3/raw-feature-extractor/test.py
+++ b/Genius3/raw-feature-extractor/test.py
@@ -46,7 +46,6 @@ def create(parent_dir, folder):
         os.mkdir(os.path.join(parent_dir, folder))
 
 
-
 def change_max_item_lines():
     f = open("F:\\kkk\\IDA_6.6\\cfg\\ida.cfg", 'rb')
     s = f.read()
@@ -89,6 +88,7 @@ def delete_error():
 
 
 def check_json():
+    print('start checking json')
     for workflow in tqdm(range(0, 69)):
         json_dir = 'D:\\hkn\\infected\\datasets\\virusshare_infected{}_json'.format(workflow)
         for json_file in os.listdir(json_dir):
@@ -99,9 +99,13 @@ def check_json():
                 continue
             finally:
                 f.close()
-            for acfg in data['acfg_list']:
-                if acfg['block_number'] != len(acfg['block_features']):
-                    print("{} {}\n".format(workflow, json_file))
+
+            if len(data['function_edges'][0]) == 0:
+                print("{} {} function_edges null\n".format(workflow, json_file))
+                # continue
+            # for acfg in data['acfg_list']:
+            #     if acfg['block_number'] != len(acfg['block_features']):
+            #         print("{} {}\n".format(workflow, json_file))
 
 
 # 临时函数，删除所有jsonl文件
@@ -112,21 +116,44 @@ def delete_jsonl():
             os.remove(os.path.join(json_dir, f))
 
 
-# 临时函数，重命名pt文件使之与代码相符
-def rename():
+def delete_all_local():
+    src = 'D:\\hkn\\infected\\datasets\\proprecessed_pt'
+    dirs = ['train_malware', 'test_malware', 'valid_malware', 'train_benign', 'test_benign', 'valid_benign',
+            'train_malware_backup', 'test_malware_backup', 'valid_malware_backup']
+    for d in dirs:
+        path = os.path.join(src, d)
+        for f in os.listdir(path):
+            os.remove(os.path.join(path, f))
+
+
+# 重命名pt文件使之与代码相符
+def rename(mal_or_be, postfix):
     tag_set = ['train', 'test', 'valid']
     for tag in tag_set:
-        data_dir = 'D:/hkn/infected/datasets/proprecessed_pt/{}_malware/'.format(tag)
+        data_dir = 'D:/hkn/infected/datasets/proprecessed_pt/{}_{}{}/'.format(tag, mal_or_be, postfix)
         for index, f in enumerate(os.listdir(data_dir)):
             os.rename(os.path.join(data_dir, f), os.path.join(data_dir, 'm' + f))
     for tag in tag_set:
-        data_dir = 'D:/hkn/infected/datasets/proprecessed_pt/{}_malware/'.format(tag)
+        data_dir = 'D:/hkn/infected/datasets/proprecessed_pt/{}_{}{}/'.format(tag, mal_or_be, postfix)
         for index, f in enumerate(os.listdir(data_dir)):
-            os.rename(os.path.join(data_dir, f), os.path.join(data_dir, 'malware_{}.pt'.format(index)))
+            os.rename(os.path.join(data_dir, f), os.path.join(data_dir, '{}_{}.pt'.format(mal_or_be, index)))
 
 
-def split_samples():
-    path = 'D:\\hkn\\infected\\datasets\\proprecessed_pt\\all'
+def split_samples(flag):
+    postfix = ''
+    if flag == 'one_family':
+        path = 'D:\\hkn\\infected\\datasets\\proprecessed_pt\\one_family_malware'
+        tag = 'malware'
+    elif flag == 'standard':
+        path = 'D:\\hkn\\infected\\datasets\\proprecessed_pt\\all'
+        postfix = '_backup'
+        tag = 'malware'
+    elif flag == 'benign':
+        path = 'D:\\hkn\\infected\\datasets\\proprecessed_pt\\all_benign'
+        tag = 'benign'
+    else:
+        return
+
     out = 'D:\\hkn\\infected\\datasets\\proprecessed_pt'
     os_list = os.listdir(path)
     random.shuffle(os_list)
@@ -135,11 +162,12 @@ def split_samples():
     test_len = int(train_len / 8)
     for index, f in enumerate(os_list):
         if index < train_len:
-            shutil.copy(os.path.join(path, f), os.path.join(out, 'train_malware'))
+            shutil.copy(os.path.join(path, f), os.path.join(out, 'train_{}'.format(tag) + postfix))
         elif train_len <= index < train_len + test_len:
-            shutil.copy(os.path.join(path, f), os.path.join(out, 'test_malware'))
+            shutil.copy(os.path.join(path, f), os.path.join(out, 'test_{}'.format(tag) + postfix))
         else:
-            shutil.copy(os.path.join(path, f), os.path.join(out, 'valid_malware'))
+            shutil.copy(os.path.join(path, f), os.path.join(out, 'valid_{}'.format(tag) + postfix))
+    rename(tag, postfix)
 
 
 def half_divide():
@@ -206,6 +234,19 @@ def del_redundant():
                     os.remove(os.path.join(pe_dir, name))
 
 
+def delete_pe():
+    dot_dir = 'D:\\hkn\\infected\\datasets\\benign_dot'
+    cfg_dir = 'D:\\hkn\\infected\\datasets\\benign_cfg'
+    dot_list = os.listdir(dot_dir)
+    for cfg in os.listdir(cfg_dir):
+        name = cfg[:-4] + ".dot"
+        if name in dot_list:
+            continue
+        else:
+            print(os.path.join(dot_dir, name))
+            # os.remove(os.path.join(dot_dir, cfg))
+
+
 if __name__ == '__main__':
     # create_dir()
     # change_max_item_lines()
@@ -213,11 +254,22 @@ if __name__ == '__main__':
     # delete_error()
     # test()
     # delete_jsonl()
+    delete_all_local()
     # check_json()
-    split_samples()
-    # rename()
+    # delete_pe()
+
+    # rename('malware', '_backup')
+
+    # 指定 'standard' or 'benign' or 'one_family'
+    # standard表示处理所有恶意样本
+    # split_samples('standard')
+    # one_family表示仅处理一个家族，仅用于测试原模型的二分类
+    # split_samples('one_family')
+    # benign表示处理良性样本
+    # split_samples('benign')
+
     # half_divide()
     # copy_train_data()
     # clear_dot()
     # read_test()
-    # del_redundant()
+    # del_redundant()
\ No newline at end of file