2021-06-06 20:50:36 +08:00
|
|
|
import os
|
2021-06-30 19:20:12 +08:00
|
|
|
import pdb
|
2021-06-06 20:50:36 +08:00
|
|
|
from utils import ORIGINAL_DATA_BASE, read_file
|
|
|
|
|
|
|
|
|
|
|
|
def check(filename):
|
|
|
|
sents = read_file(filename)
|
|
|
|
result = 0
|
|
|
|
for sent in sents:
|
2021-06-30 19:20:12 +08:00
|
|
|
result = max(result, len(sent[:-1].replace("\t", " ").split()))
|
2021-06-06 20:50:36 +08:00
|
|
|
print("The longest sentence in {} has {} words".format(filename, result))
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
longest = 0
|
|
|
|
# for i in range(6):
|
2024-04-11 16:43:57 +08:00
|
|
|
for i in range(32):
|
2021-06-06 20:50:36 +08:00
|
|
|
for group in ("pos", "neg"):
|
|
|
|
filename = os.path.join(
|
2024-04-11 16:43:57 +08:00
|
|
|
ORIGINAL_DATA_BASE, f'{group}_clean',f"inst.{i}.{group}.txt.clean"
|
2021-06-06 20:50:36 +08:00
|
|
|
)
|
|
|
|
longest = max(check(filename), longest)
|
|
|
|
print("The longest sentence in all files has {} words.".format(longest))
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
|
|
|
|