增加架构检查和文件序列排序

This commit is contained in:
huihun 2024-04-23 10:50:11 +08:00
parent 1e51a9cf2a
commit 880dd2dc65

View File

@ -45,7 +45,7 @@ def addr2vec(base_file_path, index):
# 多线程预测bert # 多线程预测bert
feature_set = {} feature_set = {}
with multiprocessing.Pool(processes=4) as pool: with multiprocessing.Pool(processes=2) as pool:
try: try:
results = list(tqdm(pool.imap_unordered(bb2vec, [item for item in feature_json]), results = list(tqdm(pool.imap_unordered(bb2vec, [item for item in feature_json]),
total=len(feature_json), total=len(feature_json),
@ -94,6 +94,7 @@ if __name__ == '__main__':
json_path = os.path.join(f'./out/json/{sample_type}') json_path = os.path.join(f'./out/json/{sample_type}')
json_files = os.listdir(json_path) json_files = os.listdir(json_path)
# json_files = ['1710ae16c54ac149f353ba58e752ba7069f88326e6b71107598283bd0fffcbd6.jsonl'] # json_files = ['1710ae16c54ac149f353ba58e752ba7069f88326e6b71107598283bd0fffcbd6.jsonl']
json_files = sorted(json_files, key=lambda x: x[0])
json_files_len = len(json_files) json_files_len = len(json_files)
now = datetime.now() now = datetime.now()
formatted_now = now.strftime("%Y-%m-%d %H:%M:%S") formatted_now = now.strftime("%Y-%m-%d %H:%M:%S")
@ -119,10 +120,11 @@ if __name__ == '__main__':
# except Exception as e: # except Exception as e:
# print(e) # print(e)
for index, json_file in tqdm(enumerate(json_files), for index, json_file in tqdm(enumerate(json_files[::-1]),
total=len(json_files), total=len(json_files),
ascii=True, ascii=True,
desc='Total:', desc='Total:',
position=0): position=0,
maxinterval=1):
if os.path.isfile(os.path.join(json_path, json_file)): if os.path.isfile(os.path.join(json_path, json_file)):
addr2vec(os.path.join(json_path, json_file), index) addr2vec(os.path.join(json_path, json_file), index)