2021-06-06 20:50:36 +08:00
|
|
|
import os
|
|
|
|
|
|
|
|
from utils import ORIGINAL_DATA_BASE, read_file
|
|
|
|
|
|
|
|
|
|
|
|
def write_file(data, filename):
|
|
|
|
print("Writing data into {}...".format(filename))
|
|
|
|
with open(filename, "w", encoding="utf-8") as fout:
|
|
|
|
for sent in data:
|
|
|
|
fout.write(sent.replace("<space>", "SPACE"))
|
|
|
|
|
|
|
|
|
|
|
|
def convert(fin, fout):
|
|
|
|
print("Start the replacement task for {}...".format(fin))
|
|
|
|
# filename = "/home/ming/malware/data/elfasm_inst_pairs/linux32_00xxxx.all"
|
|
|
|
sents = read_file(fin)
|
|
|
|
write_file(sents, fout)
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
# for i in range(6):
|
2024-04-11 16:43:57 +08:00
|
|
|
# for i in range(10):
|
|
|
|
# fin = os.path.join(ORIGINAL_DATA_BASE, "win32_0{}xxxx.all".format(i))
|
|
|
|
# fout = os.path.join(ORIGINAL_DATA_BASE, "inst.{}.pos.txt".format(i))
|
|
|
|
# convert(fin, fout)
|
|
|
|
convert(os.path.join('../dataset/all/win.all'), os.path.join('../dataset/all/inst.pos.txt'))
|
2021-06-06 20:50:36 +08:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|