66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
import csv
|
||
|
||
def csv_read(malware_csv='../cfg_data/malware_msg.csv'):
|
||
malware_cfg_list=[]
|
||
with open(malware_csv, 'r', encoding='utf-8') as f:
|
||
#经下述操作后,reader成为了一个可以迭代行的文件
|
||
reader = csv.reader(f)
|
||
#先拿出csv文件的首行(一般是基本名称说明的行),此时指针指向下一行
|
||
header = next(reader)
|
||
# print(header)
|
||
for row in reader:
|
||
file_name=row[0]
|
||
nodes_num=row[1]
|
||
edgs_num=row[2]
|
||
# insert_point_count=row[3]
|
||
#节点数量设置最大最小值,插入点至少有10个
|
||
malware_cfg_list.append([file_name, str(nodes_num), str(edgs_num)])
|
||
return malware_cfg_list
|
||
|
||
|
||
#删除节点数量小于15,大于10000的CFG
|
||
def remove_small_big_cfg(csv_data):
|
||
pretreat_data=[]
|
||
for item in csv_data:
|
||
node_num = item[1]
|
||
if int(node_num)>=15 and int(node_num)<=10000:
|
||
pretreat_data.append(item)
|
||
return pretreat_data
|
||
|
||
#删除重复的样本
|
||
def remove_repeat_sample(csv_data):
|
||
pretreat_data=[]
|
||
for origin in csv_data:
|
||
flag=1
|
||
for pre in pretreat_data:
|
||
if origin[1]==pre[1] and origin[2]==pre[2]:
|
||
flag=0
|
||
break
|
||
if flag==1:
|
||
pretreat_data.append(origin)
|
||
|
||
|
||
return pretreat_data
|
||
|
||
def write_csv(pretreat_data,csv_save_path="../cfg_data/malware_pretreat_msg.csv",header = ['malware_name','nodes_num','edgs_num']):
|
||
with open(csv_save_path, 'w', encoding='utf-8', newline='') as fp:
|
||
writer = csv.writer(fp)
|
||
writer.writerow(header)
|
||
writer.writerows(pretreat_data)
|
||
|
||
if __name__ == '__main__':
|
||
|
||
csv_data=csv_read('../cfg_data_with_feature/malware_msg1.csv')
|
||
pretreat_data=remove_small_big_cfg(csv_data)
|
||
pretreat_data=remove_repeat_sample(pretreat_data)
|
||
write_csv(pretreat_data,csv_save_path="../cfg_data_with_feature/malware_pretreat_msg1.csv")
|
||
print("malware_len",len(pretreat_data))
|
||
|
||
csv_data = csv_read('../cfg_data_with_feature/benign_msg1.csv')
|
||
pretreat_data = remove_small_big_cfg(csv_data)
|
||
pretreat_data = remove_repeat_sample(pretreat_data)
|
||
write_csv(pretreat_data, csv_save_path="../cfg_data_with_feature/benign_pretreat_msg1.csv")
|
||
print("benign_len", len(pretreat_data))
|
||
print("good")
|
||
|