常用python程序

常用python程序,第1张

复制文件

import zipfile
def zipDir(dirpath,outFullName):
    """
    压缩指定文件夹
    :param dirpath: 目标文件夹路径
    :param outFullName: 压缩文件保存路径+xxxx.zip
    :return: 无
    """
    zip = zipfile.ZipFile(outFullName,"w",zipfile.ZIP_DEFLATED)
    for path,dirnames,filenames in os.walk(dirpath):
        # 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩
        fpath = path.replace(dirpath,'')

        for filename in filenames:
            zip.write(os.path.join(path,filename),os.path.join(fpath,filename))
    zip.close()
zipDir("/home/aistudio/exp_1/out","/home/aistudio/submission.zip")

一个文件夹内的图片复制到另一个文件夹,文件夹可以不为空。

src_path源文件夹  target_path目标文件

import os
 
def copy_dir(src_path, target_path):
	if os.path.isdir(src_path) and os.path.isdir(target_path):		
		filelist_src = os.listdir(src_path)							
		for file in filelist_src:
			path = os.path.join(os.path.abspath(src_path), file)	
			if os.path.isdir(path):
				path1 = os.path.join(os.path.abspath(target_path), file)	
				if not os.path.exists(path1):						
					os.mkdir(path1)
				copy_dir(path,path1)			
			else:								
				with open(path, 'rb') as read_stream:
					contents = read_stream.read()
					path1 = os.path.join(target_path, file)
					with open(path1, 'wb') as write_stream:
						write_stream.write(contents)
		return 	True	
						
	else:
		return False	
copy_dir('exp_1/out', 'work/train/label')

对应图像分割划分数据集  不让预测图片进验证集(只可运行一次)

# 划分训练集/验证集,并生成文件名列表

import random
import os.path as osp
from glob import glob


# 随机数生成器种子
RNG_SEED = 114514
# 调节此参数控制训练集数据的占比
TRAIN_RATIO = 0.8
# 数据集路径
DATA_DIR = '/home/aistudio/work'


def write_rel_paths(phase, name, out_dir, prefix=''):
    """将文件相对路径存储在txt格式文件中"""
    with open(osp.join(out_dir, phase+'.txt'), 'a') as f:
        f.write(
            ' '.join([
                osp.join(prefix, 'A', name),
                osp.join(prefix, 'B', name),
                osp.join(prefix, 'label', name)
            ])
        )
        f.write('\n')

def write_rel_paths_1(phase, names, out_dir, prefix=''):
    """将文件相对路径存储在txt格式文件中"""
    with open(osp.join(out_dir, phase+'.txt'), 'w') as f:
        for name in names:
            f.write(
                ' '.join([
                    osp.join(prefix, 'A', name),
                    osp.join(prefix, 'B', name),
                    osp.join(prefix, 'label', name)
                ])
            )
            f.write('\n')



random.seed(RNG_SEED)


# 随机划分训练集/验证集
names = list(map(osp.basename, glob(osp.join(DATA_DIR, 'train', 'label', '*.png'))))
# 对文件名进行排序,以确保多次运行结果一致
names.sort()
random.shuffle(names)
len_train = int(len(names)*TRAIN_RATIO) # 向下取整
num=0
for name in names:
    num +=1
    if name.split('_')[0] == 'train':
        if num<=len_train:
            write_rel_paths('train', name, DATA_DIR, prefix='train')
        else:
            write_rel_paths('val', name, DATA_DIR, prefix='train')
    else:
        write_rel_paths('train', name, DATA_DIR, prefix='train')
write_rel_paths_1(
    'test', 
    map(osp.basename, glob(osp.join(DATA_DIR, 'test', 'A', '*.png'))), 
    DATA_DIR,
    prefix='test'
)


print("数据集划分已完成。")

删除全黑标签

import cv2 
import numpy as np 
import os

!rm -rf data/data134796/dataset/train/label/.ipynb_checkpoints

filenames = 'work/train/label'
for filename in os.listdir(filenames):
    file = filenames + '/' +  filename
    print(file)
    image = cv2.imread(file)
    
    #print(image.shape)
    #print(image)
    if image.any() == np.zeros((1024,1024,3)).any():
        os.remove(file)
        print('文件删除')
    else:
        print('不全为0')

print("完成")

 删除不对应的图片

# !cd data/data134796/dataset/train
import os
#根据标签删除图片   标签少图片多
images_dir = 'work/train/A'
xml_dir = 'work/train/B'
# 创建列表
xmls = []
# 读取xml文件名(即:标注的图片名)
for xml in os.listdir(xml_dir):
    # xmls.append(os.path.splitext(xml)[0])    #append()参数:在列表末尾添加新的对象,即将所有文件名读入列表
    xmls.append(xml.split('.')[0])  # splitext和split的区别:前者('0001','.jpg'), 后者('0001','jpg') 在此可选用
#print(xmls)
#os.remove(data/data134796/dataset/train/label\.ipynb_checkpoints)
# 读取所有图片
for image_name in os.listdir(images_dir):
    
    #print(image_name)
    if image_name.split('_')[0] == 'train':
        image_name = image_name.split('.')[0]
        if image_name not in xmls:
            image_name = image_name + '.png'
            print(image_name)
            os.remove(os.path.join(images_dir, image_name))
    else:

        continue
    
    

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/788416.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-05
下一篇 2022-05-05

发表评论

登录后才能评论

评论列表(0条)

保存