35 lines
993 B
Python
35 lines
993 B
Python
|
import os
|
||
|
from random import randint
|
||
|
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
from utils import ORIGINAL_DATA_BASE, read_file
|
||
|
|
||
|
|
||
|
def create(pos, neg, tgt):
|
||
|
pos_sents = read_file(pos)
|
||
|
|
||
|
neg_sents = read_file(neg)
|
||
|
neg_length = len(neg_sents)
|
||
|
print("Start writing negative examples to {}...".format(tgt))
|
||
|
with open(tgt, "w", encoding="utf-8") as fout:
|
||
|
for sent in tqdm(pos_sents):
|
||
|
first = sent.split("\t")[0]
|
||
|
index = randint(0, neg_length - 1)
|
||
|
pair = neg_sents[index].split("\t")[randint(0, 1)].replace("\n", "")
|
||
|
fout.write(first + "\t" + pair + "\n")
|
||
|
|
||
|
|
||
|
def main():
|
||
|
# for i in range(6):
|
||
|
for i in [1]:
|
||
|
j = (i + 1) % 6
|
||
|
pos = os.path.join(ORIGINAL_DATA_BASE, "linux32_0{}xxxx.all".format(i))
|
||
|
neg = os.path.join(ORIGINAL_DATA_BASE, "linux32_0{}xxxx.all".format(j))
|
||
|
tgt = os.path.join(ORIGINAL_DATA_BASE, "inst.{}.neg.txt".format(i))
|
||
|
create(pos, neg, tgt)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|