22 lines
562 B
Python
22 lines
562 B
Python
|
import pandas as pd
|
|||
|
|
|||
|
|
|||
|
def extract_features(file_path):
|
|||
|
# 读取csv文件
|
|||
|
df = pd.read_csv(file_path, delimiter=',')
|
|||
|
|
|||
|
# 按第2列数值降序排序
|
|||
|
df["count"] = pd.to_numeric(df["count"], errors='coerce')
|
|||
|
df_sorted = df.sort_values(by='count', ascending=True)
|
|||
|
|
|||
|
# 筛选出第2列值大于10000的行,并提取第1列内容
|
|||
|
features = df_sorted[df_sorted['count'] <0]
|
|||
|
|
|||
|
return features
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
|
|||
|
# 使用函数,传入csv文件路径
|
|||
|
features = extract_features('./out/3gram.csv')
|
|||
|
print(features)
|