复制文件
import zipfile
def zipDir(dirpath,outFullName):
"""
压缩指定文件夹
:param dirpath: 目标文件夹路径
:param outFullName: 压缩文件保存路径+xxxx.zip
:return: 无
"""
zip = zipfile.ZipFile(outFullName,"w",zipfile.ZIP_DEFLATED)
for path,dirnames,filenames in os.walk(dirpath):
# 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩
fpath = path.replace(dirpath,'')
for filename in filenames:
zip.write(os.path.join(path,filename),os.path.join(fpath,filename))
zip.close()
zipDir("/home/aistudio/exp_1/out","/home/aistudio/submission.zip")
一个文件夹内的图片复制到另一个文件夹,文件夹可以不为空。
src_path源文件夹 target_path目标文件
import os
def copy_dir(src_path, target_path):
if os.path.isdir(src_path) and os.path.isdir(target_path):
filelist_src = os.listdir(src_path)
for file in filelist_src:
path = os.path.join(os.path.abspath(src_path), file)
if os.path.isdir(path):
path1 = os.path.join(os.path.abspath(target_path), file)
if not os.path.exists(path1):
os.mkdir(path1)
copy_dir(path,path1)
else:
with open(path, 'rb') as read_stream:
contents = read_stream.read()
path1 = os.path.join(target_path, file)
with open(path1, 'wb') as write_stream:
write_stream.write(contents)
return True
else:
return False
copy_dir('exp_1/out', 'work/train/label')
对应图像分割划分数据集 不让预测图片进验证集(只可运行一次)
# 划分训练集/验证集,并生成文件名列表
import random
import os.path as osp
from glob import glob
# 随机数生成器种子
RNG_SEED = 114514
# 调节此参数控制训练集数据的占比
TRAIN_RATIO = 0.8
# 数据集路径
DATA_DIR = '/home/aistudio/work'
def write_rel_paths(phase, name, out_dir, prefix=''):
"""将文件相对路径存储在txt格式文件中"""
with open(osp.join(out_dir, phase+'.txt'), 'a') as f:
f.write(
' '.join([
osp.join(prefix, 'A', name),
osp.join(prefix, 'B', name),
osp.join(prefix, 'label', name)
])
)
f.write('\n')
def write_rel_paths_1(phase, names, out_dir, prefix=''):
"""将文件相对路径存储在txt格式文件中"""
with open(osp.join(out_dir, phase+'.txt'), 'w') as f:
for name in names:
f.write(
' '.join([
osp.join(prefix, 'A', name),
osp.join(prefix, 'B', name),
osp.join(prefix, 'label', name)
])
)
f.write('\n')
random.seed(RNG_SEED)
# 随机划分训练集/验证集
names = list(map(osp.basename, glob(osp.join(DATA_DIR, 'train', 'label', '*.png'))))
# 对文件名进行排序,以确保多次运行结果一致
names.sort()
random.shuffle(names)
len_train = int(len(names)*TRAIN_RATIO) # 向下取整
num=0
for name in names:
num +=1
if name.split('_')[0] == 'train':
if num<=len_train:
write_rel_paths('train', name, DATA_DIR, prefix='train')
else:
write_rel_paths('val', name, DATA_DIR, prefix='train')
else:
write_rel_paths('train', name, DATA_DIR, prefix='train')
write_rel_paths_1(
'test',
map(osp.basename, glob(osp.join(DATA_DIR, 'test', 'A', '*.png'))),
DATA_DIR,
prefix='test'
)
print("数据集划分已完成。")
删除全黑标签
import cv2
import numpy as np
import os
!rm -rf data/data134796/dataset/train/label/.ipynb_checkpoints
filenames = 'work/train/label'
for filename in os.listdir(filenames):
file = filenames + '/' + filename
print(file)
image = cv2.imread(file)
#print(image.shape)
#print(image)
if image.any() == np.zeros((1024,1024,3)).any():
os.remove(file)
print('文件删除')
else:
print('不全为0')
print("完成")
删除不对应的图片
# !cd data/data134796/dataset/train
import os
#根据标签删除图片 标签少图片多
images_dir = 'work/train/A'
xml_dir = 'work/train/B'
# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
# xmls.append(os.path.splitext(xml)[0]) #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
xmls.append(xml.split('.')[0]) # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
#print(xmls)
#os.remove(data/data134796/dataset/train/label\.ipynb_checkpoints)
# 读取所有图片
for image_name in os.listdir(images_dir):
#print(image_name)
if image_name.split('_')[0] == 'train':
image_name = image_name.split('.')[0]
if image_name not in xmls:
image_name = image_name + '.png'
print(image_name)
os.remove(os.path.join(images_dir, image_name))
else:
continue
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)