import csv def csv_read(malware_csv='../cfg_data/malware_msg.csv'): malware_cfg_list=[] with open(malware_csv, 'r', encoding='utf-8') as f: #经下述操作后,reader成为了一个可以迭代行的文件 reader = csv.reader(f) #先拿出csv文件的首行(一般是基本名称说明的行),此时指针指向下一行 header = next(reader) # print(header) for row in reader: file_name=row[0] nodes_num=row[1] edgs_num=row[2] # insert_point_count=row[3] #节点数量设置最大最小值,插入点至少有10个 malware_cfg_list.append([file_name, str(nodes_num), str(edgs_num)]) return malware_cfg_list #删除节点数量小于15,大于10000的CFG def remove_small_big_cfg(csv_data): pretreat_data=[] for item in csv_data: node_num = item[1] if int(node_num)>=15 and int(node_num)<=10000: pretreat_data.append(item) return pretreat_data #删除重复的样本 def remove_repeat_sample(csv_data): pretreat_data=[] for origin in csv_data: flag=1 for pre in pretreat_data: if origin[1]==pre[1] and origin[2]==pre[2]: flag=0 break if flag==1: pretreat_data.append(origin) return pretreat_data def write_csv(pretreat_data,csv_save_path="../cfg_data/malware_pretreat_msg.csv",header = ['malware_name','nodes_num','edgs_num']): with open(csv_save_path, 'w', encoding='utf-8', newline='') as fp: writer = csv.writer(fp) writer.writerow(header) writer.writerows(pretreat_data) if __name__ == '__main__': csv_data=csv_read('../cfg_data_with_feature/malware_msg1.csv') pretreat_data=remove_small_big_cfg(csv_data) pretreat_data=remove_repeat_sample(pretreat_data) write_csv(pretreat_data,csv_save_path="../cfg_data_with_feature/malware_pretreat_msg1.csv") print("malware_len",len(pretreat_data)) csv_data = csv_read('../cfg_data_with_feature/benign_msg1.csv') pretreat_data = remove_small_big_cfg(csv_data) pretreat_data = remove_repeat_sample(pretreat_data) write_csv(pretreat_data, csv_save_path="../cfg_data_with_feature/benign_pretreat_msg1.csv") print("benign_len", len(pretreat_data)) print("good")