进度信息改进

This commit is contained in:
huihun 2024-04-20 13:39:14 +08:00
parent 7f1d7de95d
commit d6edd34697

View File

@ -10,7 +10,7 @@ import concurrent.futures
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
def addr2vec(base_file_path): def addr2vec(base_file_path, index):
# 从路径拆分文件名与路径 # 从路径拆分文件名与路径
file_name = str(os.path.basename(base_file_path)) file_name = str(os.path.basename(base_file_path))
file_path = str(os.path.dirname(base_file_path)) file_path = str(os.path.dirname(base_file_path))
@ -19,7 +19,6 @@ def addr2vec(base_file_path):
if file_name: if file_name:
# 忽略已生成的文件 # 忽略已生成的文件
if os.path.exists(os.path.join(file_path, 'final', file_name)): if os.path.exists(os.path.join(file_path, 'final', file_name)):
process_bar.update(1)
return return
file_json = load_json(base_file_path) file_json = load_json(base_file_path)
# 确保存在基础文件而不存在特征文件的情况 # 确保存在基础文件而不存在特征文件的情况
@ -42,7 +41,11 @@ def addr2vec(base_file_path):
with multiprocessing.Pool(processes=os.cpu_count()) as pool: with multiprocessing.Pool(processes=os.cpu_count()) as pool:
try: try:
results = pool.imap_unordered(bb2vec, [item for item in feature_json]) results = list(tqdm(pool.imap_unordered(bb2vec, [item for item in feature_json]),
total=len(feature_json),
desc=f'{file_name} Progress:{index}/{json_files_len} ',
leave=True,
dynamic_ncols=True))
for result in results: for result in results:
feature_set[result[0]] = result[1] feature_set[result[0]] = result[1]
except Exception as e: except Exception as e:
@ -58,7 +61,6 @@ def addr2vec(base_file_path):
else: else:
logger.error(f'文件{file_name}不存在特征文件') logger.error(f'文件{file_name}不存在特征文件')
process_bar.update(1)
return return
@ -68,6 +70,7 @@ if __name__ == '__main__':
# json_path = os.path.join(f'./out/json/{sample_type}') # json_path = os.path.join(f'./out/json/{sample_type}')
json_path = os.path.join(f'./out/json/{sample_type}') json_path = os.path.join(f'./out/json/{sample_type}')
json_files = os.listdir(json_path) json_files = os.listdir(json_path)
json_files_len = len(json_files)
now = datetime.now() now = datetime.now()
formatted_now = now.strftime("%Y-%m-%d %H:%M:%S") formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
print("start time:", formatted_now) print("start time:", formatted_now)
@ -76,7 +79,6 @@ if __name__ == '__main__':
# total=len(json_files))) # total=len(json_files)))
# multi_thread_order(addr2vec, [os.path.join(json_path, file) for file in json_files if # multi_thread_order(addr2vec, [os.path.join(json_path, file) for file in json_files if
# os.path.isfile(os.path.join(json_path, file))], thread_num=THREAD_FULL) # os.path.isfile(os.path.join(json_path, file))], thread_num=THREAD_FULL)
process_bar = tqdm(total=len(json_files)) for index, json_file in enumerate(json_files):
for json_file in json_files:
if os.path.isfile(os.path.join(json_path, json_file)): if os.path.isfile(os.path.join(json_path, json_file)):
addr2vec(os.path.join(json_path, json_file)) addr2vec(os.path.join(json_path, json_file), index)