sample_cleaning_demo/Sample_cleaning/de_oversize.py
2023-05-12 11:20:02 +08:00

46 lines
2.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import sys
import shutil
from tqdm import tqdm
class de_oversizefile(object):
def __init__(self, sampleFile_dir, max_file_size,min_file_size,save_dir):
"""
:param sampleFile_dir: 样本文件所在目录
:param max_file_size: 样本文件最大的大小单位MB
:param min_file_size: 样本文件最小的大小单位MB
"""
self.sampleFile_dir = sampleFile_dir
self.max_file_size = max_file_size
self.min_file_size=min_file_size
self.save_dir=save_dir
def filesizeFilter(self):
"""
对过大的样本文件进行删除
:return:
"""
for root, dirs, files in os.walk(self.sampleFile_dir):
for file in tqdm(files):
# 获取文件所属目录
# 获取文件路径
file_path = os.path.join(root, file)
root_dir = os.path.join(root, file)
file_stats = os.stat(file_path)
if (file_stats.st_size / (1024 * 1024) > self.max_file_size):
shutil.copy(root_dir, self.save_dir)
os.remove(file_path)
print('删除文件' + file + '其大小为' + str(file_stats.st_size / (1024 * 1024)) + 'MB大于' + str(
self.max_file_size) + 'MB,已将其保存在'+self.save_dir+'文件中,请查看')
elif (file_stats.st_size / (1024 * 1024) < self.min_file_size):
shutil.copy(root_dir, self.save_dir)
os.remove(file_path)
print('删除文件' + file + '其大小为' + str(file_stats.st_size / (1024 * 1024)) + 'MB小于' + str(
self.min_file_size) + 'MB,已将其保存在'+self.save_dir+'文件中,请查看')
else:
print(file + '其大小为' + str(file_stats.st_size / (1024 * 1024))+ 'MB' +'为所需要的样本无需清除')
print("已留下所需文件")