widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

一休哥※

发布时间 2023.06.09阅读数 1624 评论数 0

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

数据可视化

import os
import cv2

if __name__ == '__main__':
    path = 'D:\songjiahao\DATA\WiderPerson/train.txt'
    with open(path, 'r') as f:
        img_ids = [x for x in f.read().splitlines()]

    for img_id in img_ids:  # '000040'
        img_path = 'D:/songjiahao/DATA/WiderPerson/Images/' + img_id + '.jpg'
        img = cv2.imread(img_path)

        im_h = img.shape[0]
        im_w = img.shape[1]
        print(img_path)
        label_path = img_path.replace('Images','Annotations') + '.txt'
        print(label_path)
        with open(label_path) as file:
            line = file.readline()
            count = int(line.split('\n')[0])  # 里面行人个数
            line = file.readline()
            while line:
                cls = int(line.split(' ')[0])
                print(cls)
                # < class_label =1: pedestrians > 行人
                # < class_label =2: riders >      骑车的
                # < class_label =3: partially-visible persons > 遮挡的部分行人
                # < class_label =4: ignore regions > 一些假人，比如图画上的人
                # < class_label =5: crowd > 拥挤人群，直接大框覆盖了
                if cls == 1  or cls == 3:
                    xmin = float(line.split(' ')[1])
                    ymin = float(line.split(' ')[2])
                    xmax = float(line.split(' ')[3])
                    ymax = float(line.split(' ')[4].split('\n')[0])
                    img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)
                line = file.readline()
        cv2.imshow('result', img)
        cv2.waitKey(0)

数据标注中存在着许多class，1代表行人，2代表骑车的人，3代表遮挡的行人，4代表假人，5代表密集人群。class 5标注直接一个大框标注了，不太实用，去除了四和五类。并将剩下的都归为1类，与crowdhuman数据对齐。

数据去除

import os
from pathlib import Path
from PIL import Image
import csv
import shutil
import math

# coding=utf-8
def check_charset(file_path):
    import chardet
    with open(file_path, "rb") as f:
        data = f.read(4)
        charset = chardet.detect(data)['encoding']
    return charset


def convert(size, box0, box1, box2, box3):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box0 + box2) / 2 * dw
    y = (box1 + box3) / 2 * dh
    w = (box2 - box0) * dw
    h = (box3 - box1) * dh
    print(x, y, w, h)
    x, y ,w ,h = '%.7f'%(x),'%.7f'%(y),'%.7f'%(w),'%.7f'%(h)

    return (x, y, w, h)


if __name__ == '__main__':
    path = r'D:\songjiahao\DATA\WiderPerson/val.txt'
    with open(path, 'r') as f:
        img_ids = [x for x in f.read().splitlines()]

    for img_id in img_ids:  # '000040'
        img_path = r'D:\songjiahao\DATA\WiderPerson\widerperson\train/' + img_id + '.jpg'

        with Image.open(img_path) as Img:
            img_size = Img.size

        ans = ''

        label_path = img_path.replace('train', 'Annotations') + '.txt'
        outpath = r'D:\songjiahao\DATA\WiderPerson\widerperson\labels/' + img_id + '.txt'
        print(outpath)
        with open(label_path, encoding=check_charset(label_path)) as file:
            print(label_path)
            line = file.readline()
            count = int(line.split('\n')[0])  # 里面行人个数
            line = file.readline()
            while line:
                cls = int(line.split(' ')[0])
                if cls == 1 or cls == 2 or cls == 3 :
                    xmin = float(line.split(' ')[1])
                    ymin = float(line.split(' ')[2])
                    xmax = float(line.split(' ')[3])
                    ymax = float(line.split(' ')[4].split('\n')[0])
                    # print(img_size[0], img_size[1], xmin, ymin, xmax, ymax)
                    bb = convert(img_size, xmin, ymin, xmax, ymax)
                    ans = ans + '1' + ' ' + ' '.join(str(a) for a in bb) + '\n'
                line = file.readline()
        # print(ans
        with open(outpath,'w') as outfile:
            outfile.write(ans)
        shutil.copy(img_path, r'D:\songjiahao\DATA\WiderPerson\widerperson\annotation/' + img_id + '.jpg')

因为test.的labels并未给出，所以只对train.txt和val.txt把共9000张可用labels进行数据清除和归一化处理。

数据移动，将与labels对应的图片移动到同一文件夹

import shutil
import os
if __name__ == '__main__':
    label_path=r"D:\songjiahao\DATA\WiderPerson\widerperson\labels\train/"
    imgids = os.listdir(label_path)
    print(len(imgids))
    n=0
    for i in imgids:
        n += 1
        img_ids_path = label_path.replace('labels', 'images')+ i[0:6] +'.jpg'
        To_imgpath=r'D:\songjiahao\DATA\WiderPerson\widerperson\trainimg/'
        print(img_ids_path,To_imgpath,n)
        shutil.copy(img_ids_path, To_imgpath)

根据train.txt 和val。txt分离照片和txt

#第一天学习
#第一天学习
# 根据train。txt和test。txt划分数据集
import os
import shutil

#原始路径
traintxt=r'D:\songjiahao\DATA\数据集\WiderPerson\train.txt'
testtxt=r'D:\songjiahao\DATA\数据集\WiderPerson\val.txt'
srcima=r'D:\songjiahao\DATA\数据集\widerhuman\images\train/'
srctxt=r'D:\songjiahao\DATA\数据集\widerhuman\labels\train/'
#训练集路径
train_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\train/'
train_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\train/'
# 验证集路径
val_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\val/'
val_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\val/'
# 测试集路径
test_image_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\images\test/'
test_label_path = r'D:\songjiahao\DATA\数据集\WiderPerson\widerPerson划分\labels\test/'
# 数据集划分比例，训练集75%，验证集15%，测试集15%
train_percent = 0.7
val_percent = 0.1
test_percent = 0.1
# 检查文件夹是否存在
def mkdir():
    if not os.path.exists(train_image_path):

        os.makedirs(train_image_path)
    if not os.path.exists(train_label_path):
        os.makedirs(train_label_path)

    if not os.path.exists(val_image_path):
        os.makedirs(val_image_path)
    if not os.path.exists(val_label_path):
        os.makedirs(val_label_path)

    if not os.path.exists(test_image_path):
        os.makedirs(test_image_path)
    if not os.path.exists(test_label_path):
        os.makedirs(test_label_path)

def main():
    mkdir()
    f = open(traintxt, 'r')
    trainlist = f.readlines()
    f = open(testtxt, 'r')
    testlist = f.readlines()
    f.close()
    print("训练集数目：{}，测试集数目：{}".format(len(trainlist),len(testlist)))
    for i in range(len(trainlist)):

        name = trainlist[i].strip()
        # srcImage = traintxt.replace('train.txt','')  + name + '.jpg'
        # srcLabel = traintxt.replace('train.txt','') + name + '.txt'
        srcImage = srcima + name + '.jpg'
        srcLabel = srctxt + name +'.txt'
        print(srcImage)
        dst_train_Image = train_image_path + name + '.jpg'
        dst_train_Label = train_label_path + name + '.txt'

        shutil.copyfile(srcImage, dst_train_Image)
        print(srcImage)
        # shutil.copyfile(srcLabel, dst_train_Label)
    for i in range(len(testlist)):
        name = testlist[i].strip()

        srcImage = srcima + name + '.jpg'
        srcLabel = srctxt + name + '.txt'

        dst_test_Image = test_image_path + name + '.jpg'
        dst_test_Label = test_label_path + name + '.txt'

        # shutil.copyfile(srcImage, dst_test_Image)
        # shutil.copyfile(srcLabel, dst_test_Label)
        # print(i+1)



if __name__ == '__main__':
    main()

人工智能机器学习深度学习目标识别 yolov5

打赏 0

上一篇：基于opencv的检测人脸，并截取人脸

下一篇：尝试解决YOLOv5推理rtsp有延迟的一些方法

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

一休哥※

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

数据可视化

数据去除

数据移动，将与labels对应的图片移动到同一文件夹

根据train.txt 和val。txt分离照片和txt

为你推荐

目标检测算法---将数据集为划分训练集和验证集

经典机器学习系列(四)【神经网络详解】

win10中jupyter notebook设置conda虚拟环境全流程及问题汇总

行人重识别(13)——代码实践之随机采样器（sampler.py）

精选COLMAP多视角视图数据可视化

精选知识图谱基本工具Neo4j使用笔记四：使用csv文件批量导入图谱数据

关于作者

一休哥※

10

0

0

2

win10+libtorch+yolov5-6.0部署

win10系统下yolov5-V6.1版本的tensorrt部署细节教程及bug修改

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

相关推荐

【tensorflow2.0】fashion mnist 数据集训练

「RL篇伍」一文读懂 TRPO 原理与实现

单阶段实例分割综述

AD2. 模仿学习(Imitation Learning)

（十三）通俗易懂理解——Adaboost算法原理

为什么两层33卷积核效果比1层55卷积核效果要好？

热门泡泡

30积分失眠，聊聊自己搞ROS的心得体会吧

ros学习路线

30积分 TF_REPEATED_DATA ignoring data错误

各位大佬，有什么ROS定位算法推荐吗

5积分想买能用ROS2的开发套件。或者开发板

5积分 ros中启动gazebo时报错

给作者打赏

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

一休哥※

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

数据可视化

数据去除

数据移动，将与labels对应的图片移动到同一文件夹

根据train.txt 和val。txt分离照片和txt

为你推荐

目标检测算法---将数据集为划分训练集和验证集

经典机器学习系列(四)【神经网络详解】

win10中jupyter notebook设置conda虚拟环境全流程及问题汇总

行人重识别(13)——代码实践之随机采样器（sampler.py）

精选COLMAP多视角视图数据可视化

精选知识图谱基本工具Neo4j使用笔记 四 ：使用csv文件批量导入图谱数据

评论（0）

关于作者

一休哥※

10

0

0

2

win10+libtorch+yolov5-6.0部署

win10系统下yolov5-V6.1版本的tensorrt部署细节教程及bug修改

widerperson数据集转化为YOLOv5训练格式，并加入到crowdhuman中

相关推荐

【tensorflow2.0】fashion mnist 数据集训练

「RL篇 伍」一文读懂 TRPO 原理与实现

单阶段实例分割综述

AD2. 模仿学习(Imitation Learning)

（十三）通俗易懂理解——Adaboost算法原理

为什么两层3*3卷积核效果比1层5*5卷积核效果要好？

热门泡泡

30积分 失眠，聊聊自己搞ROS的心得体会吧

ros学习路线

30积分 TF_REPEATED_DATA ignoring data错误

各位大佬，有什么ROS定位算法推荐吗

5积分 想买能用ROS2的开发套件。或者开发板

5积分 ros中启动gazebo时报错

给作者打赏

忘记密码

修改头像

添加你感兴趣的标签

举报类型（必选）

举报详情（选填）

精选知识图谱基本工具Neo4j使用笔记四：使用csv文件批量导入图谱数据

「RL篇伍」一文读懂 TRPO 原理与实现

为什么两层33卷积核效果比1层55卷积核效果要好？

30积分失眠，聊聊自己搞ROS的心得体会吧

5积分想买能用ROS2的开发套件。或者开发板