46 lines
2.0 KiB
Python
46 lines
2.0 KiB
Python
|
import os
|
|||
|
import sys
|
|||
|
import shutil
|
|||
|
|
|||
|
from tqdm import tqdm
|
|||
|
|
|||
|
class de_oversizefile(object):
|
|||
|
def __init__(self, sampleFile_dir, max_file_size,min_file_size,save_dir):
|
|||
|
"""
|
|||
|
|
|||
|
:param sampleFile_dir: 样本文件所在目录
|
|||
|
:param max_file_size: 样本文件最大的大小,单位MB
|
|||
|
:param min_file_size: 样本文件最小的大小,单位MB
|
|||
|
"""
|
|||
|
self.sampleFile_dir = sampleFile_dir
|
|||
|
self.max_file_size = max_file_size
|
|||
|
self.min_file_size=min_file_size
|
|||
|
self.save_dir=save_dir
|
|||
|
|
|||
|
def filesizeFilter(self):
|
|||
|
"""
|
|||
|
对过大的样本文件进行删除
|
|||
|
:return:
|
|||
|
"""
|
|||
|
for root, dirs, files in os.walk(self.sampleFile_dir):
|
|||
|
|
|||
|
for file in tqdm(files):
|
|||
|
# 获取文件所属目录
|
|||
|
# 获取文件路径
|
|||
|
file_path = os.path.join(root, file)
|
|||
|
root_dir = os.path.join(root, file)
|
|||
|
file_stats = os.stat(file_path)
|
|||
|
if (file_stats.st_size / (1024 * 1024) > self.max_file_size):
|
|||
|
shutil.copy(root_dir, self.save_dir)
|
|||
|
os.remove(file_path)
|
|||
|
print('删除文件' + file + '其大小为' + str(file_stats.st_size / (1024 * 1024)) + 'MB大于' + str(
|
|||
|
self.max_file_size) + 'MB,已将其保存在'+self.save_dir+'文件中,请查看')
|
|||
|
elif (file_stats.st_size / (1024 * 1024) < self.min_file_size):
|
|||
|
shutil.copy(root_dir, self.save_dir)
|
|||
|
os.remove(file_path)
|
|||
|
print('删除文件' + file + '其大小为' + str(file_stats.st_size / (1024 * 1024)) + 'MB小于' + str(
|
|||
|
self.min_file_size) + 'MB,已将其保存在'+self.save_dir+'文件中,请查看')
|
|||
|
else:
|
|||
|
print(file + '其大小为' + str(file_stats.st_size / (1024 * 1024))+ 'MB' +'为所需要的样本无需清除')
|
|||
|
print("已留下所需文件")
|