Inst2Vec/process_data/create_negative_examples.py

35 lines
993 B
Python

import os
from random import randint
from tqdm import tqdm
from utils import ORIGINAL_DATA_BASE, read_file
def create(pos, neg, tgt):
pos_sents = read_file(pos)
neg_sents = read_file(neg)
neg_length = len(neg_sents)
print("Start writing negative examples to {}...".format(tgt))
with open(tgt, "w", encoding="utf-8") as fout:
for sent in tqdm(pos_sents):
first = sent.split("\t")[0]
index = randint(0, neg_length - 1)
pair = neg_sents[index].split("\t")[randint(0, 1)].replace("\n", "")
fout.write(first + "\t" + pair + "\n")
def main():
# for i in range(6):
for i in [1]:
j = (i + 1) % 6
pos = os.path.join(ORIGINAL_DATA_BASE, "linux32_0{}xxxx.all".format(i))
neg = os.path.join(ORIGINAL_DATA_BASE, "linux32_0{}xxxx.all".format(j))
tgt = os.path.join(ORIGINAL_DATA_BASE, "inst.{}.neg.txt".format(i))
create(pos, neg, tgt)
if __name__ == "__main__":
main()