-
-
Notifications
You must be signed in to change notification settings - Fork 238
Description
Hi all, I am trying to understand the contents of the VG-SGG-with-attri.h5 file to construct my own .h5 file for testing models on a custom dataset. This file has various keys such as active_object_mask, attributes, boxes_1024, boxes_512, img_to_first_box, etc. As maskrcnn_benchmark/data/datasets/visual_genome150.py I wrote the following code to visualize a particular bounding box for the first image in the dataset:
# Import necessary libraries
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from PIL import Image,ImageDraw
import h5py
# Function to draw a single bounding box on an image
def draw_single_box(pic, box, color='red'):
draw = ImageDraw.Draw(pic)
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
draw.rectangle(((x1, y1), (x2, y2)), outline=color,width=3)
# Import necessary libraries
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from PIL import Image,ImageDraw
import h5py
VG150_OBJ_CATEGORIES = ['__background__', 'airplane', 'animal', 'arm', 'bag', 'banana', 'basket', 'beach', 'bear', 'bed', 'bench', 'bike', 'bird', 'board', 'boat', 'book', 'boot', 'bottle', 'bowl', 'box', 'boy', 'branch', 'building', 'bus', 'cabinet', 'cap', 'car', 'cat', 'chair', 'child', 'clock', 'coat', 'counter', 'cow', 'cup', 'curtain', 'desk', 'dog', 'door', 'drawer', 'ear', 'elephant', 'engine', 'eye', 'face', 'fence', 'finger', 'flag', 'flower', 'food', 'fork', 'fruit', 'giraffe', 'girl', 'glass', 'glove', 'guy', 'hair', 'hand', 'handle', 'hat', 'head', 'helmet', 'hill', 'horse', 'house', 'jacket', 'jean', 'kid', 'kite', 'lady', 'lamp', 'laptop', 'leaf', 'leg', 'letter', 'light', 'logo', 'man', 'men', 'motorcycle', 'mountain', 'mouth', 'neck', 'nose', 'number', 'orange', 'pant', 'paper', 'paw', 'people', 'person', 'phone', 'pillow', 'pizza', 'plane', 'plant', 'plate', 'player', 'pole', 'post', 'pot', 'racket', 'railing', 'rock', 'roof', 'room', 'screen', 'seat', 'sheep', 'shelf', 'shirt', 'shoe', 'short', 'sidewalk', 'sign', 'sink', 'skateboard', 'ski', 'skier', 'sneaker', 'snow', 'sock', 'stand', 'street', 'surfboard', 'table', 'tail', 'tie', 'tile', 'tire', 'toilet', 'towel', 'tower', 'track', 'train', 'tree', 'truck', 'trunk', 'umbrella', 'vase', 'vegetable', 'vehicle', 'wave', 'wheel', 'window', 'windshield', 'wing', 'wire', 'woman', 'zebra']
# Load the H5 file to get bounding boxes
file_path = './DATASET/VG150/VG-SGG-with-attri.h5'
with h5py.File(file_path, 'r') as h5_file:
img_to_first_box = h5_file['img_to_first_box'][:]
img_to_last_box = h5_file['img_to_last_box'][:]
boxes_1024 = h5_file['boxes_1024'][:]
labels = h5_file['labels'][:]
# Specify the image path and index
image_path = './DATASET/VG150/VG_100K/1.jpg'
image_index = 0
box_index = 10
# Retrieve bounding boxes for the specific image
first_box_index = img_to_first_box[image_index]
last_box_index = img_to_last_box[image_index]
boxes_for_this_image = boxes_1024[first_box_index:last_box_index + 1]
# Load the image
image = Image.open(image_path)
# Scale image to 1024 x 1024
image = image.resize((1024, 1024))
# Plot bounding boxes
for box in boxes_for_this_image[box_index:]:
box[:2] = box[:2] - box[2:] / 2
box[2:] = box[:2] + box[2:]
draw_single_box(image, box)
print("Label: ",VG150_OBJ_CATEGORIES[labels[box_index,0]])
break
# Display the image with bounding boxes using Matplotlib
plt.imshow(image)
#plt.axis('off') # Hide the axes
plt.show()
Sorry for the messy code. But essentially I load the bounding box coordinates from the boxes_1024 database, re-format it to (x1.y1.x2.y2) format from (cx,cy,w,h) and apply the bounding box on the image which is resized to (1024,1024). I also print the annotated object label for the box.
My issue is that the bounding box is plot at an offset and does not correspond accurately to the label. I have attached an example here.
Am I doing something wrong here? I would really appreciate any help since I've been stuck on this problem for the last two days!
