Exporting Edge Impulse data in a different format

Question/Issue:
Hi guys! I’m trying to export my Edge Impulse dataset in a format that can be recognized by YOLOv5. I have tried various methods of converting the json to the yolo format but for whatever reason, none of them seem to be working. Is something wrong with the way the dataset was exported or the way I tried to convert it?

As for the ways I tried to convert it I tried putting the edge impulse dataset into other platforms like roboflow or labelbox and others and tried to export the dataset in the format I want but none of these other platforms can read this dataset format.

Hello @kmath99d,

I have written that script a long time ago. I suspect it still works.

It should convert your Edge Impulse object detection format to COCO JSON format.
This should be easily ingested by Roboflow or other platforms.

Just copy paste the following code in a file named convert.py

python3 convert.py --input path-to-your-dataset
#!/usr/bin/env python
import os
import argparse
import json

def convert_to_coco_format(input):

    f = open(input+'/bounding_boxes.labels')
    edge_impulse_data = json.load(f)

    coco_data = {
        "info": {},
        "licenses": [],
        "categories": [],
        "images": [],
        "annotations": []
    }

    # Initialize category IDs and image IDs
    category_id = 1
    image_id = 1

    # Process each image in the Edge Impulse data
    for image_filename, bounding_boxes in edge_impulse_data['boundingBoxes'].items():
        # Add image information to the COCO data
        image_info = {
            "id": image_id,
            "file_name": image_filename
            # Add any other relevant image information here
        }
        coco_data['images'].append(image_info)

        # Process each bounding box annotation for the current image
        for bounding_box in bounding_boxes:
            # Add category information to the COCO data if not already present
            if bounding_box['label'] not in coco_data['categories']:
                category_info = {
                    "id": category_id,
                    "name": bounding_box['label'],
                    # Add any other relevant category information here
                }
                coco_data['categories'].append(category_info)
                category_id += 1

            # Add annotation information to the COCO data
            annotation_info = {
                "id": len(coco_data['annotations']) + 1,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [bounding_box['x'], bounding_box['y'], bounding_box['width'], bounding_box['height']],
                "area": bounding_box['width'] * bounding_box['height'],
                "iscrowd": 0
            }
            coco_data['annotations'].append(annotation_info)

        image_id += 1

    coco_json = json.dumps(coco_data)

    # Save the COCO JSON data to a file
    with open(input+'/coco_data.json', 'w') as f:
        f.write(coco_json)

if __name__=="__main__":
    print("convert Edge Impulse bounding_boxes.labels to COCO JSON format") 
    a = argparse.ArgumentParser()
    a.add_argument("--input", help="input folder")

    args = a.parse_args()
    # print(args)
    convert_to_coco_format(args.input)


#exemple: python3 convert.py --input input

Let me know if that works, I think I have another one for Pascal VOC format as well but I need to look for it in my archives :smiley:

That being said, let me create an internal feature request to support the different export format.

Best,

Louis

Here is the one to convert Edge Impulse Object Detection format to Pascal VOC format:

convert.py:

import os
import json
import cv2
import argparse
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom

def create_xml_annotation(dataset, image_name, bounding_boxes, h, w, c):
    root = Element("annotation")
    folder = SubElement(root, "folder")
    folder.text = dataset  # Update with your folder name
    
    filename = SubElement(root, "filename")
    filename.text = image_name
    
    size = SubElement(root, "size")
    width = SubElement(size, "width")
    width.text = str(w)  # Update with actual image width
    
    height = SubElement(size, "height")
    height.text = str(h)  # Update with actual image height
    
    depth = SubElement(size, "depth")
    depth.text = str(c)  # Update with actual image depth (e.g., 3 for RGB)
    
    for box in bounding_boxes:
        obj = SubElement(root, "object")
        name = SubElement(obj, "name")
        name.text = box["label"]
        
        bndbox = SubElement(obj, "bndbox")
        xmin = SubElement(bndbox, "xmin")
        xmin.text = str(box["x"])
        
        ymin = SubElement(bndbox, "ymin")
        ymin.text = str(box["y"])
        
        xmax = SubElement(bndbox, "xmax")
        xmax.text = str(box["x"] + box["width"])
        
        ymax = SubElement(bndbox, "ymax")
        ymax.text = str(box["y"] + box["height"])
    
    xml_str = minidom.parseString(tostring(root)).toprettyxml(indent="  ")
    return xml_str

def convert_to_voc_format(input):

    f = open(input+'/bounding_boxes.labels')
    edge_impulse_data = json.load(f)
    bounding_boxes = edge_impulse_data["boundingBoxes"]
    
    for image_name, boxes in bounding_boxes.items():
        im = cv2.imread(input+"/"+image_name)
        h, w, c = im.shape
        xml_annotation = create_xml_annotation(input, image_name, boxes, h, w, c)
        xml_filename = os.path.splitext(image_name)[0] + ".xml"
        xml_path = os.path.join(input, xml_filename)
        
        with open(xml_path, "w") as xml_file:
            xml_file.write(xml_annotation)

if __name__=="__main__":
    print("Convert Edge Impulse bounding_boxes.labels to Pascal VOC format") 
    a = argparse.ArgumentParser()
    a.add_argument("--input", help="input folder")

    args = a.parse_args()
    # print(args)
    convert_to_voc_format(args.input)


#exemple: python3 convert.py --input input