66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
|
import csv
|
|||
|
|
|||
|
def csv_read(malware_csv='../cfg_data/malware_msg.csv'):
|
|||
|
malware_cfg_list=[]
|
|||
|
with open(malware_csv, 'r', encoding='utf-8') as f:
|
|||
|
#经下述操作后,reader成为了一个可以迭代行的文件
|
|||
|
reader = csv.reader(f)
|
|||
|
#先拿出csv文件的首行(一般是基本名称说明的行),此时指针指向下一行
|
|||
|
header = next(reader)
|
|||
|
# print(header)
|
|||
|
for row in reader:
|
|||
|
file_name=row[0]
|
|||
|
nodes_num=row[1]
|
|||
|
edgs_num=row[2]
|
|||
|
# insert_point_count=row[3]
|
|||
|
#节点数量设置最大最小值,插入点至少有10个
|
|||
|
malware_cfg_list.append([file_name, str(nodes_num), str(edgs_num)])
|
|||
|
return malware_cfg_list
|
|||
|
|
|||
|
|
|||
|
#删除节点数量小于15,大于10000的CFG
|
|||
|
def remove_small_big_cfg(csv_data):
|
|||
|
pretreat_data=[]
|
|||
|
for item in csv_data:
|
|||
|
node_num = item[1]
|
|||
|
if int(node_num)>=15 and int(node_num)<=10000:
|
|||
|
pretreat_data.append(item)
|
|||
|
return pretreat_data
|
|||
|
|
|||
|
#删除重复的样本
|
|||
|
def remove_repeat_sample(csv_data):
|
|||
|
pretreat_data=[]
|
|||
|
for origin in csv_data:
|
|||
|
flag=1
|
|||
|
for pre in pretreat_data:
|
|||
|
if origin[1]==pre[1] and origin[2]==pre[2]:
|
|||
|
flag=0
|
|||
|
break
|
|||
|
if flag==1:
|
|||
|
pretreat_data.append(origin)
|
|||
|
|
|||
|
|
|||
|
return pretreat_data
|
|||
|
|
|||
|
def write_csv(pretreat_data,csv_save_path="../cfg_data/malware_pretreat_msg.csv",header = ['malware_name','nodes_num','edgs_num']):
|
|||
|
with open(csv_save_path, 'w', encoding='utf-8', newline='') as fp:
|
|||
|
writer = csv.writer(fp)
|
|||
|
writer.writerow(header)
|
|||
|
writer.writerows(pretreat_data)
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
|
|||
|
csv_data=csv_read('../cfg_data_with_feature/malware_msg1.csv')
|
|||
|
pretreat_data=remove_small_big_cfg(csv_data)
|
|||
|
pretreat_data=remove_repeat_sample(pretreat_data)
|
|||
|
write_csv(pretreat_data,csv_save_path="../cfg_data_with_feature/malware_pretreat_msg1.csv")
|
|||
|
print("malware_len",len(pretreat_data))
|
|||
|
|
|||
|
csv_data = csv_read('../cfg_data_with_feature/benign_msg1.csv')
|
|||
|
pretreat_data = remove_small_big_cfg(csv_data)
|
|||
|
pretreat_data = remove_repeat_sample(pretreat_data)
|
|||
|
write_csv(pretreat_data, csv_save_path="../cfg_data_with_feature/benign_pretreat_msg1.csv")
|
|||
|
print("benign_len", len(pretreat_data))
|
|||
|
print("good")
|
|||
|
|