All testset detection analysis. Part 2

In this small post I decided to play more with SSD. My goal was to count all objects per all classes that Network was able to detect. Also, it was interesting how class distribution changes per different Neural Network’s certainty.

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
%matplotlib inline
plt.rcParams['figure.figsize'] = (20, 20)
plt.rcParams.update({'font.size': 22})

import os
import operator
from collections import Counter

caffe_root = '/home/veronika/materials/cv/detection/caffe/examples/'  
# this file is expected to be in {caffe_root}/examples
import sys
#sys.path.append("/home/veronika/materials/cv/detection/caffe/build/tools/caffe")
import sys
sys.path.append('/home/veronika/materials/cv/detection/caffe/python')
sys.path.remove('/home/veronika/caffe/python')

import caffe

from google.protobuf import text_format
from caffe.proto import caffe_pb2

# load PASCAL VOC labels
labelmap_file = '/home/veronika/materials/cv/detection/git/labelmap_voc.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(file.read()), labelmap)

/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

<caffe.proto.caffe_pb2.LabelMap at 0x7f1ecb828938>

def get_labelname(labelmap, labels):
    num_labels = len(labelmap.item)
    labelnames = []
    if type(labels) is not list:
        labels = [labels]
    for label in labels:
        found = False
        for i in xrange(0, num_labels):
            if label == labelmap.item[i].label:
                found = True
                labelnames.append(labelmap.item[i].display_name)
                break
        assert found == True
    return labelnames

caffe.set_device(0)
caffe.set_mode_gpu()
model_def = '/home/veronika/materials/cv/detection/models_trained/VGGNet/deploy.prototxt'
model_weights = '/home/veronika/materials/cv/detection/models_trained/VGGNet/VGG_VOC0712_SSD_300x300_iter_60000.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_mean('data', np.array([104,117,123])) # mean pixel
transformer.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB

labels = pd.read_csv("/home/veronika/materials/cv/cv_organizer/mydata/test_labels.csv",
                    sep = " ", header = None)

The following function is actually detects all objects that have higher probability that conf_level for all images that are in test set.

def get_object_descr(path_to_test, conf_level):
    descr_total = {}
    for i in range(labels[0].shape[0]):
        #path_to_test = path_to_img
        image = caffe.io.load_image(os.path.join(path_to_test, labels[0][i]))
        transformed_image = transformer.preprocess('data', image)
        net.blobs['data'].data[...] = transformed_image

        # Forward pass.
        detections = net.forward()['detection_out']
    
        det_label = detections[0,0,:,1]
        det_conf = detections[0,0,:,2]

        # Get detections with confidence higher than 0.6.
        top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_level]

        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_labels = get_labelname(labelmap, top_label_indices)
        #descr = Counter(dict((i,top_labels.count(i)) for i in set(top_labels)))
        descr = Counter(top_labels)
        descr_total = descr + Counter(descr_total)
        print("Done!")
    return(descr_total)

path_to_test = "/home/veronika/materials/cv/cv_organizer/mydata/test/"
object_desc_02 = get_object_descr(path_to_test, 0.2)
object_desc_04 = get_object_descr(path_to_test, 0.4)
object_desc_06 = get_object_descr(path_to_test, 0.6)
object_desc_08 = get_object_descr(path_to_test, 0.8)

This is a distribution per classes with 80% of model’s certainty.

object_desc_08

Counter({u'aeroplane': 8,
         u'bicycle': 54,
         u'bird': 6,
         u'boat': 31,
         u'bottle': 3,
         u'bus': 5,
         u'car': 62,
         u'cat': 1,
         u'chair': 71,
         u'diningtable': 19,
         u'dog': 16,
         u'horse': 49,
         u'motorbike': 9,
         u'person': 2158,
         u'pottedplant': 30,
         u'sheep': 1,
         u'sofa': 14,
         u'train': 6,
         u'tvmonitor': 18})

object_desc_02_df = pd.DataFrame(object_desc_02.items(), columns=['ClassType', 'Prob02'])
object_desc_04_df = pd.DataFrame(object_desc_04.items(), columns=['ClassType', 'Prob04'])
object_desc_06_df = pd.DataFrame(object_desc_06.items(), columns=['ClassType', 'Prob06'])
#object_desc_08_df = pd.DataFrame(object_desc_08.items(), columns=['ClassType', 'Prob08'])

The following table was actually my goal. I can see how many objects CNN found per class with different probability levels.

totaldata = pd.merge(object_desc_02_df, object_desc_04_df, how='outer')
totaldata = pd.merge(totaldata, object_desc_06_df, how = "outer")
#totaldata = pd.merge(totaldata, object_desc_08_df, how = "outer")
totaldata

	ClassType	Prob02	Prob04	Prob06
0	sheep	17	7	4
1	bottle	42	9	3
2	horse	101	76	64
3	bicycle	168	92	61
4	motorbike	50	25	14
5	cow	17	5	1
6	bus	21	13	11
7	dog	74	46	29
8	cat	4	1	1
9	person	4688	3367	2699
10	train	41	21	11
11	diningtable	70	41	26
12	aeroplane	32	18	13
13	sofa	54	39	24
14	pottedplant	146	73	45
15	tvmonitor	85	48	36
16	chair	357	186	112
17	bird	42	22	14
18	boat	102	65	45
19	car	192	119	89

The next step was dataframe transformation, that is needed for plotting barplots using ggplot library. More details are in the next post. The visualization of the dataframe is in the Part 2.2

totaldata_melted = pd.melt(totaldata, id_vars=['ClassType'], value_vars=['Prob02', 'Prob04', 'Prob06'],
                          var_name='Probs', value_name='Amount')
totaldata_melted

	ClassType	Probs	Amount
0	sheep	Prob02	17
1	bottle	Prob02	42
2	horse	Prob02	101
3	bicycle	Prob02	168
4	motorbike	Prob02	50
5	cow	Prob02	17
6	bus	Prob02	21
7	dog	Prob02	74
8	cat	Prob02	4
9	person	Prob02	4688
10	train	Prob02	41
11	diningtable	Prob02	70
12	aeroplane	Prob02	32
13	sofa	Prob02	54
14	pottedplant	Prob02	146
15	tvmonitor	Prob02	85
16	chair	Prob02	357
17	bird	Prob02	42
18	boat	Prob02	102
19	car	Prob02	192
20	sheep	Prob04	7
21	bottle	Prob04	9
22	horse	Prob04	76
23	bicycle	Prob04	92
24	motorbike	Prob04	25
25	cow	Prob04	5
26	bus	Prob04	13
27	dog	Prob04	46
28	cat	Prob04	1
29	person	Prob04	3367
30	train	Prob04	21
31	diningtable	Prob04	41
32	aeroplane	Prob04	18
33	sofa	Prob04	39
34	pottedplant	Prob04	73
35	tvmonitor	Prob04	48
36	chair	Prob04	186
37	bird	Prob04	22
38	boat	Prob04	65
39	car	Prob04	119
40	sheep	Prob06	4
41	bottle	Prob06	3
42	horse	Prob06	64
43	bicycle	Prob06	61
44	motorbike	Prob06	14
45	cow	Prob06	1
46	bus	Prob06	11
47	dog	Prob06	29
48	cat	Prob06	1
49	person	Prob06	2699
50	train	Prob06	11
51	diningtable	Prob06	26
52	aeroplane	Prob06	13
53	sofa	Prob06	24
54	pottedplant	Prob06	45
55	tvmonitor	Prob06	36
56	chair	Prob06	112
57	bird	Prob06	14
58	boat	Prob06	45
59	car	Prob06	89

totaldata_melted.to_csv("/home/veronika/materials/cv/detection/total_classes.csv")

All details about SSD are here: https://github.com/weiliu89/caffe/tree/ssd/examples

All testset detection analysis. Part 2

by Veronika Yurchuk

All testset detection analysis. Part 2

by Veronika Yurchuk

In this small post I decided to play more with SSD. My goal was to count all objects per all classes that Network was able to detect. Also, it was interesting how class distribution changes per different Neural Network’s certainty.