1 VOC标签格式转yolo格式并划分训练集和测试集


         我们经常从网上获取一些目标检测的数据集资源标签的格式都是VOC(xml格式)的,而yolov5训练所需要的文件格式是yolo(txt格式)的,这里就需要对xml格式的标签文件转换为txt文件。同时训练自己的yolov5检测模型的时候,数据集需要划分为训练集和验证集。这里提供了一份代码将xml格式的标注文件转换为txt格式的标注文件,并按比例划分为训练集和验证集。先上代码再讲解代码的注意事项。


import xml.etree.ElementTree as ET


import pickle


import os


from os import listdir, getcwd


from os.path import join


import random


from shutil import copyfile




classes = [“hat”, “person”]


#classes=[“ball”]




TRAIN_RATIO = 80




def <span class=”hljs-title function_“>clear_hidden_files(path):


dir_list = os.listdir(path)


for i in dir_list:


abspath = os.path.join(os.path.abspath(path), i)


if os.path.isfile(abspath):


if i.startswith(“._“):


os.remove(abspath)


else:


clear_hidden_files(abspath)




def <span class=”hljs-title function_“>convert(size, box):


dw = 1./size[0]


dh = 1./size[1]


x = (box[0] + box[1])/2.0


y = (box[2] + box[3])/2.0


w = box[1] - box[0]


h = box[3] - box[2]


x = x_dw


w = w_dw


y = y_dh


h = h_dh


return (x,y,w,h)




def convert_annotation(image_id):


in_file = open(‘VOCdevkit/VOC2007/Annotations/%s.xml’ %image_id)


out_file = open(‘VOCdevkit/VOC2007/YOLOLabels/%s.txt’ %image_id, ‘w’)


tree=ET.parse(in_file)


root = tree.getroot()


size = root.find(‘size’)


w = int(size.find(‘width’).text)


h = int(size.find(‘height’).text)




for obj in root.iter(‘object’):


difficult = obj.find(‘difficult’).text


cls = obj.find(‘name’).text


if cls not in classes or int(difficult) == 1:


continue


cls_id = classes.index(cls)


xmlbox = obj.find(‘bndbox’)


b = (float(xmlbox.find(‘xmin’).text), float(xmlbox.find(‘xmax’).text), float(xmlbox.find(‘ymin’).text), float(xmlbox.find(‘ymax’).text))


bb = convert((w,h), b)


out_file.write(str(cls_id) + “ “ + “ “.join([str(a) for a in bb]) + ‘\n’)


in_file.close()


out_file.close()




wd = os.getcwd()


wd = os.getcwd()


data_base_dir = os.path.join(wd, “VOCdevkit/“)


if not os.path.isdir(data_base_dir):


os.mkdir(data_base_dir)


work_sapce_dir = os.path.join(data_base_dir, “VOC2007/“)


if not os.path.isdir(work_sapce_dir):


os.mkdir(work_sapce_dir)


annotation_dir = os.path.join(work_sapce_dir, “Annotations/“)


if not os.path.isdir(annotation_dir):


os.mkdir(annotation_dir)


clear_hidden_files(annotation_dir)


image_dir = os.path.join(work_sapce_dir, “JPEGImages/“)


if not os.path.isdir(image_dir):


os.mkdir(image_dir)


clear_hidden_files(image_dir)


yolo_labels_dir = os.path.join(work_sapce_dir, “YOLOLabels/“)


if not os.path.isdir(yolo_labels_dir):


os.mkdir(yolo_labels_dir)


clear_hidden_files(yolo_labels_dir)


yolov5_images_dir = os.path.join(data_base_dir, “images/“)


if not os.path.isdir(yolov5_images_dir):


os.mkdir(yolov5_images_dir)


clear_hidden_files(yolov5_images_dir)


yolov5_labels_dir = os.path.join(data_base_dir, “labels/“)


if not os.path.isdir(yolov5_labels_dir):


os.mkdir(yolov5_labels_dir)


clear_hidden_files(yolov5_labels_dir)


yolov5_images_train_dir = os.path.join(yolov5_images_dir, “train/“)


if not os.path.isdir(yolov5_images_train_dir):


os.mkdir(yolov5_images_train_dir)


clear_hidden_files(yolov5_images_train_dir)


yolov5_images_test_dir = os.path.join(yolov5_images_dir, “val/“)


if not os.path.isdir(yolov5_images_test_dir):


os.mkdir(yolov5_images_test_dir)


clear_hidden_files(yolov5_images_test_dir)


yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, “train/“)


if not os.path.isdir(yolov5_labels_train_dir):


os.mkdir(yolov5_labels_train_dir)


clear_hidden_files(yolov5_labels_train_dir)


yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, “val/“)


if not os.path.isdir(yolov5_labels_test_dir):


os.mkdir(yolov5_labels_test_dir)


clear_hidden_files(yolov5_labels_test_dir)




train_file = open(os.path.join(wd, “yolov5_train.txt”), ‘w’)


test_file = open(os.path.join(wd, “yolov5_val.txt”), ‘w’)


train_file.close()


test_file.close()


train_file = open(os.path.join(wd, “yolov5_train.txt”), ‘a’)


test_file = open(os.path.join(wd, “yolov5_val.txt”), ‘a’)


list_imgs = os.listdir(image_dir) # list image files


prob = random.randint(1, 100)


print(“Probability: %d” % prob)


for i in range(0,len(list_imgs)):


path = os.path.join(image_dir,list_imgs[i])


if os.path.isfile(path):


image_path = image_dir + list_imgs[i]


voc_path = list_imgs[i]


(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))


(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))


annotation_name = nameWithoutExtention + ‘.xml’


annotation_path = os.path.join(annotation_dir, annotation_name)


label_name = nameWithoutExtention + ‘.txt’


label_path = os.path.join(yolo_labels_dir, label_name)


prob = random.randint(1, 100)


print(“Probability: %d” % prob)


if(prob < TRAIN_RATIO): # train dataset


if os.path.exists(annotation_path):


train_file.write(image_path + ‘\n’)


convert_annotation(nameWithoutExtention) # convert label


copyfile(image_path, yolov5_images_train_dir + voc_path)


copyfile(label_path, yolov5_labels_train_dir + label_name)


else: # test dataset


if os.path.exists(annotation_path):


test_file.write(image_path + ‘\n’)


convert_annotation(nameWithoutExtention) # convert label


copyfile(image_path, yolov5_images_test_dir + voc_path)


copyfile(label_path, yolov5_labels_test_dir + label_name)


train_file.close()


test_file.close()


        首先数据集的格式结构必须严格按照如图的样式来,因为代码已经将文件名写死了。其实这样也好,因为统一就会规范 。


20210831173834541.png


Annotations里面存放着xml格式的标签文件


JPEGImages里面存放着照片数据文件


        特别要注意的是,classes里面必须正确填写xml里面已经标注好的类,要不然生成的txt的文件是不对的。TRAIN_RATIO是训练集和验证集的比例,当等于80的时候,说明划分80%给训练集,20%给验证集。


watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBA5q-P5aSp5YaZYnVn,size_16,color_FFFFFF,t_70,g_se,x_16


         将代码和数据在同一目录下运行,得到如下的结果


watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBA5q-P5aSp5YaZYnVn,size_6,color_FFFFFF,t_70,g_se,x_16


         在VOCdevkit目录下生成images和labels文件夹,文件夹下分别生成了train文件夹和val文件夹,里面分别保存着训练集的照片和txt格式的标签,还有验证集的照片和txt格式的标签。images文件夹和labels文件夹就是训练yolov5模型所需的训练集和验证集。在VOCdevkit/VOC2007目录下还生成了一个YOLOLabels文件夹,里面存放着所有的txt格式的标签文件。


        至此,xml格式的标签文件转换为txt格式的标签文件并划分为训练集和测试集就讲完了。


2  标签为yolo格式数据集划分训练集和验证集


        由于yolov5训练需要的数据标签格式为txt格式,所以大家在利用labelimg标注的时候会用yolo格式(标注生成的标签为txt格式)。标注好的数据集训练的时候就要划分为训练集和验证集,因此就需要有划分为训练集和测试集的代码。这里需要讲的是我写的脚本代码可以成功将数据集划分为训练集和验证集,但是在训练模型的时候,加载数据集一直会出现问题。因此我就想到了,先把txt格式的数据集替换成xml格式的数据集,然后再按上述将xml格式标签转化为txt格式标签并划分为训练集和验证集的方法划分就好了。但是这里建议大家以后标注的时候就标注为voc格式(xml格式),因为该格式的标签里面有图片标注的具体内容,例如标注类别,图片大小,标注坐标。但是yolo格式(txt格式)里面是用数字来代表类别,这样很不直观,而且标注的坐标也是经过转化归一化的,坐标信息更加不直观。先上yolo转voc的代码。

 



from xml.dom.minidom import Document


import os


import cv2






# def makexml(txtPath, xmlPath, picPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径


def <span class=”hljs-title function_“>makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径


“””此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件






在自己的标注图片文件夹下建三个子文件夹,分别命名为picture、txt、xml


“””


dic = {‘0’: “hat”, # 创建字典用来对类型进行转换


‘1’: “person”, # 此处的字典要与自己的classes.txt文件中的类对应,且顺序要一致


}


files = os.listdir(txtPath)


for i, name in enumerate(files):


xmlBuilder = Document()


annotation = xmlBuilder.createElement(“annotation”) # 创建annotation标签


xmlBuilder.appendChild(annotation)


txtFile = open(txtPath + name)


txtList = txtFile.readlines()


img = cv2.imread(picPath + name[0:-4] + “.jpg”)


Pheight, Pwidth, Pdepth = img.shape




folder = xmlBuilder.createElement(“folder”) # folder标签


foldercontent = xmlBuilder.createTextNode(“driving_annotation_dataset”)


folder.appendChild(foldercontent)


annotation.appendChild(folder) # folder标签结束




filename = xmlBuilder.createElement(“filename”) # filename标签


filenamecontent = xmlBuilder.createTextNode(name[0:-4] + “.jpg”)


filename.appendChild(filenamecontent)


annotation.appendChild(filename) # filename标签结束




size = xmlBuilder.createElement(“size”) # size标签


width = xmlBuilder.createElement(“width”) # size子标签width


widthcontent = xmlBuilder.createTextNode(str(Pwidth))


width.appendChild(widthcontent)


size.appendChild(width) # size子标签width结束




height = xmlBuilder.createElement(“height”) # size子标签height


heightcontent = xmlBuilder.createTextNode(str(Pheight))


height.appendChild(heightcontent)


size.appendChild(height) # size子标签height结束




depth = xmlBuilder.createElement(“depth”) # size子标签depth


depthcontent = xmlBuilder.createTextNode(str(Pdepth))


depth.appendChild(depthcontent)


size.appendChild(depth) # size子标签depth结束




annotation.appendChild(size) # size标签结束




for j in txtList:


oneline = j.strip().split(“ “)


object = xmlBuilder.createElement(“object”) # object 标签


picname = xmlBuilder.createElement(“name”) # name标签


namecontent = xmlBuilder.createTextNode(dic[oneline[0]])


picname.appendChild(namecontent)


object.appendChild(picname) # name标签结束




pose = xmlBuilder.createElement(“pose”) # pose标签


posecontent = xmlBuilder.createTextNode(“Unspecified”)


pose.appendChild(posecontent)


object.appendChild(pose) # pose标签结束




truncated = xmlBuilder.createElement(“truncated”) # truncated标签


truncatedContent = xmlBuilder.createTextNode(“0”)


truncated.appendChild(truncatedContent)


object.appendChild(truncated) # truncated标签结束




difficult = xmlBuilder.createElement(“difficult”) # difficult标签


difficultcontent = xmlBuilder.createTextNode(“0”)


difficult.appendChild(difficultcontent)


object.appendChild(difficult) # difficult标签结束




bndbox = xmlBuilder.createElement(“bndbox”) # bndbox标签


xmin = xmlBuilder.createElement(“xmin”) # xmin标签


mathData = int(((float(oneline[1])) _ Pwidth + 1) - (float(oneline[3])) _ 0.5 _ Pwidth)


xminContent = xmlBuilder.createTextNode(str(mathData))


xmin.appendChild(xminContent)


bndbox.appendChild(xmin) # xmin标签结束




ymin = xmlBuilder.createElement(“ymin”) # ymin标签


mathData = int(((float(oneline[2])) _ Pheight + 1) - (float(oneline[4])) _ 0.5 _ Pheight)


yminContent = xmlBuilder.createTextNode(str(mathData))


ymin.appendChild(yminContent)


bndbox.appendChild(ymin) # ymin标签结束




xmax = xmlBuilder.createElement(“xmax”) # xmax标签


mathData = int(((float(oneline[1])) _ Pwidth + 1) + (float(oneline[3])) _ 0.5 _ Pwidth)


xmaxContent = xmlBuilder.createTextNode(str(mathData))


xmax.appendChild(xmaxContent)


bndbox.appendChild(xmax) # xmax标签结束




ymax = xmlBuilder.createElement(“ymax”) # ymax标签


mathData = int(((float(oneline[2])) _ Pheight + 1) + (float(oneline[4])) _ 0.5 _ Pheight)


ymaxContent = xmlBuilder.createTextNode(str(mathData))


ymax.appendChild(ymaxContent)


bndbox.appendChild(ymax) # ymax标签结束




object.appendChild(bndbox) # bndbox标签结束




annotation.appendChild(object) # object标签结束




f = open(xmlPath + name[0:-4] + “.xml”, ‘w’)


xmlBuilder.writexml(f, indent=‘\t’, newl=‘\n’, addindent=‘\t’, encoding=‘utf-8’)


f.close()




if name == “__main__“:


picPath = “VOCdevkit/VOC2007/JPEGImages/“ # 图片所在文件夹路径,后面的/一定要带上


txtPath = “VOCdevkit/VOC2007/YOLO/“ # txt所在文件夹路径,后面的/一定要带上


xmlPath = “VOCdevkit/VOC2007/Annotations/“ # xml文件保存路径,后面的/一定要带上


makexml(picPath, txtPath, xmlPath)



 

        首先讲一下数据的格式,要严格按照下图的目录结构来。为了后续数据集的划分做出统一数据集目录结构。且数据目录要和代码在同一目录下,这样就可以一键运行了。


20210901103753466.jpg


    JPEGImages为图片数据所在的目录
    YOLO为yolo格式的标签数据所在目录
    Annotations为生成的voc格式数据标签目录(程序运行前这是一个空目录)


如下图这里要对应好,且顺序要一致,例如我这里是0对应hat,1对应person。


watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBA5q-P5aSp5YaZYnVn,size_20,color_FFFFFF,t_70,g_se,x_16


        运行如上的代码,就可以将yolo格式的标签转化为voc格式,并保存在  Annotations目录中,最后可以按照上述1的方法,将voc转为yolo再划分数据集就可以了。


        至此yolo格式数据集划分训练集和验证集就结束了