22 lines
562 B
Python
22 lines
562 B
Python
import pandas as pd
|
||
|
||
|
||
def extract_features(file_path):
|
||
# 读取csv文件
|
||
df = pd.read_csv(file_path, delimiter=',')
|
||
|
||
# 按第2列数值降序排序
|
||
df["count"] = pd.to_numeric(df["count"], errors='coerce')
|
||
df_sorted = df.sort_values(by='count', ascending=True)
|
||
|
||
# 筛选出第2列值大于10000的行,并提取第1列内容
|
||
features = df_sorted[df_sorted['count'] <0]
|
||
|
||
return features
|
||
|
||
if __name__ == '__main__':
|
||
|
||
# 使用函数,传入csv文件路径
|
||
features = extract_features('./out/3gram.csv')
|
||
print(features)
|