45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
|
import os
|
||
|
import json
|
||
|
if __name__ == '__main__':
|
||
|
vocab = {}
|
||
|
file_list=os.listdir("./asm_base")
|
||
|
# print(file)
|
||
|
# exit()
|
||
|
for file_name in file_list:
|
||
|
file = open(os.path.join("./asm_base",file_name), mode = 'r')
|
||
|
|
||
|
for line in file.readlines()[3:]:
|
||
|
remove_n=line.strip("\n")
|
||
|
asm_list=remove_n.split(maxsplit=1)
|
||
|
opcode=asm_list[0]
|
||
|
if 'LABEL' not in opcode:
|
||
|
if opcode not in vocab:
|
||
|
vocab[opcode]=1
|
||
|
else:
|
||
|
vocab[opcode]+=1
|
||
|
|
||
|
if len(asm_list)>1:
|
||
|
operation_list=asm_list[1].split(",")
|
||
|
for oper in operation_list:
|
||
|
if oper[0]==" ":
|
||
|
oper=oper.lstrip()
|
||
|
|
||
|
if 'LABEL' not in oper:
|
||
|
if oper not in vocab :
|
||
|
vocab[oper] = 1
|
||
|
else:
|
||
|
vocab[oper] += 1
|
||
|
print(len(vocab))
|
||
|
print(vocab)
|
||
|
|
||
|
sorted_d = sorted(vocab.items(), key=lambda x: x[1],reverse=True)
|
||
|
res_json = {}
|
||
|
i=0
|
||
|
for (token,sum) in sorted_d:
|
||
|
i+=1
|
||
|
res_json[token]=i
|
||
|
print(res_json)
|
||
|
# for i in vocab:
|
||
|
# print(i)
|
||
|
# print(vocab[i])
|