OpenShot Library | libopenshot  0.3.2
ObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iostream>
16 
18 #include "effects/Tracker.h"
19 #include "Exceptions.h"
20 #include "Timeline.h"
21 #include "objdetectdata.pb.h"
22 
23 #include <QImage>
24 #include <QPainter>
25 #include <QRectF>
26 using namespace std;
27 using namespace openshot;
28 
29 
31 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
32 {
33  // Init effect properties
34  init_effect_details();
35 
36  // Tries to load the tracker data from protobuf
37  LoadObjDetectdData(clipObDetectDataPath);
38 
39  // Initialize the selected object index as the first object index
40  selectedObjectIndex = trackedObjects.begin()->first;
41 }
42 
43 // Default constructor
44 ObjectDetection::ObjectDetection()
45 {
46  // Init effect properties
47  init_effect_details();
48 
49  // Initialize the selected object index as the first object index
50  selectedObjectIndex = trackedObjects.begin()->first;
51 }
52 
53 // Init effect settings
54 void ObjectDetection::init_effect_details()
55 {
57  InitEffectInfo();
58 
60  info.class_name = "ObjectDetection";
61  info.name = "Object Detector";
62  info.description = "Detect objects through the video.";
63  info.has_audio = false;
64  info.has_video = true;
65  info.has_tracked_object = true;
66 }
67 
68 // This method is required for all derived classes of EffectBase, and returns a
69 // modified openshot::Frame object
70 std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number)
71 {
72  // Get the frame's image
73  cv::Mat cv_image = frame->GetImageCV();
74 
75  // Check if frame isn't NULL
76  if(cv_image.empty()){
77  return frame;
78  }
79 
80  // Initialize the Qt rectangle that will hold the positions of the bounding-box
81  std::vector<QRectF> boxRects;
82  // Initialize the image of the TrackedObject child clip
83  std::vector<std::shared_ptr<QImage>> childClipImages;
84 
85  // Check if track data exists for the requested frame
86  if (detectionsData.find(frame_number) != detectionsData.end()) {
87  float fw = cv_image.size().width;
88  float fh = cv_image.size().height;
89 
90  DetectionData detections = detectionsData[frame_number];
91  for(int i = 0; i<detections.boxes.size(); i++){
92 
93  // Does not show boxes with confidence below the threshold
94  if(detections.confidences.at(i) < confidence_threshold){
95  continue;
96  }
97  // Just display selected classes
98  if( display_classes.size() > 0 &&
99  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
100  continue;
101  }
102 
103  // Get the object id
104  int objectId = detections.objectIds.at(i);
105 
106  // Search for the object in the trackedObjects map
107  auto trackedObject_it = trackedObjects.find(objectId);
108 
109  // Cast the object as TrackedObjectBBox
110  std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
111 
112  // Check if the tracked object has data for this frame
113  if (trackedObject->Contains(frame_number) &&
114  trackedObject->visible.GetValue(frame_number) == 1)
115  {
116  // Get the bounding-box of given frame
117  BBox trackedBox = trackedObject->GetBox(frame_number);
118  bool draw_text = !display_box_text.GetValue(frame_number);
119  std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
120  int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
121  float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
122  std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
123  float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
124 
125  cv::Rect2d box(
126  (int)( (trackedBox.cx-trackedBox.width/2)*fw),
127  (int)( (trackedBox.cy-trackedBox.height/2)*fh),
128  (int)( trackedBox.width*fw),
129  (int)( trackedBox.height*fh)
130  );
131 
132  // If the Draw Box property is off, then make the box invisible
133  if (trackedObject->draw_box.GetValue(frame_number) == 0)
134  {
135  bg_alpha = 1.0;
136  stroke_alpha = 1.0;
137  }
138 
139  drawPred(detections.classIds.at(i), detections.confidences.at(i),
140  box, cv_image, detections.objectIds.at(i), bg_rgba, bg_alpha, 1, true, draw_text);
141  drawPred(detections.classIds.at(i), detections.confidences.at(i),
142  box, cv_image, detections.objectIds.at(i), stroke_rgba, stroke_alpha, stroke_width, false, draw_text);
143 
144 
145  // Get the Detected Object's child clip
146  if (trackedObject->ChildClipId() != ""){
147  // Cast the parent timeline of this effect
148  Timeline* parentTimeline = static_cast<Timeline *>(ParentTimeline());
149  if (parentTimeline){
150  // Get the Tracked Object's child clip
151  Clip* childClip = parentTimeline->GetClip(trackedObject->ChildClipId());
152 
153  if (childClip){
154  // Get the image of the child clip for this frame
155  std::shared_ptr<Frame> childClipFrame = childClip->GetFrame(frame_number);
156  childClipImages.push_back(childClipFrame->GetImage());
157 
158  // Set the Qt rectangle with the bounding-box properties
159  QRectF boxRect;
160  boxRect.setRect((int)((trackedBox.cx-trackedBox.width/2)*fw),
161  (int)((trackedBox.cy - trackedBox.height/2)*fh),
162  (int)(trackedBox.width*fw),
163  (int)(trackedBox.height*fh));
164  boxRects.push_back(boxRect);
165  }
166  }
167  }
168  }
169  }
170  }
171 
172  // Update Qt image with new Opencv frame
173  frame->SetImageCV(cv_image);
174 
175  // Set the bounding-box image with the Tracked Object's child clip image
176  if(boxRects.size() > 0){
177  // Get the frame image
178  QImage frameImage = *(frame->GetImage());
179  for(int i; i < boxRects.size();i++){
180  // Set a Qt painter to the frame image
181  QPainter painter(&frameImage);
182  // Draw the child clip image inside the bounding-box
183  painter.drawImage(boxRects[i], *childClipImages[i]);
184  }
185  // Set the frame image as the composed image
186  frame->AddImage(std::make_shared<QImage>(frameImage));
187  }
188 
189  return frame;
190 }
191 
192 void ObjectDetection::DrawRectangleRGBA(cv::Mat &frame_image, cv::RotatedRect box, std::vector<int> color, float alpha,
193  int thickness, bool is_background){
194  // Get the bouding box vertices
195  cv::Point2f vertices2f[4];
196  box.points(vertices2f);
197 
198  // TODO: take a rectangle of frame_image by refencence and draw on top of that to improve speed
199  // select min enclosing rectangle to draw on a small portion of the image
200  // cv::Rect rect = box.boundingRect();
201  // cv::Mat image = frame_image(rect)
202 
203  if(is_background){
204  cv::Mat overlayFrame;
205  frame_image.copyTo(overlayFrame);
206 
207  // draw bounding box background
208  cv::Point vertices[4];
209  for(int i = 0; i < 4; ++i){
210  vertices[i] = vertices2f[i];}
211 
212  cv::Rect rect = box.boundingRect();
213  cv::fillConvexPoly(overlayFrame, vertices, 4, cv::Scalar(color[2],color[1],color[0]), cv::LINE_AA);
214  // add opacity
215  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
216  }
217  else{
218  cv::Mat overlayFrame;
219  frame_image.copyTo(overlayFrame);
220 
221  // Draw bounding box
222  for (int i = 0; i < 4; i++)
223  {
224  cv::line(overlayFrame, vertices2f[i], vertices2f[(i+1)%4], cv::Scalar(color[2],color[1],color[0]),
225  thickness, cv::LINE_AA);
226  }
227 
228  // add opacity
229  cv::addWeighted(overlayFrame, 1-alpha, frame_image, alpha, 0, frame_image);
230  }
231 }
232 
233 void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber, std::vector<int> color,
234  float alpha, int thickness, bool is_background, bool display_text)
235 {
236 
237  if(is_background){
238  cv::Mat overlayFrame;
239  frame.copyTo(overlayFrame);
240 
241  //Draw a rectangle displaying the bounding box
242  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
243 
244  // add opacity
245  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
246  }
247  else{
248  cv::Mat overlayFrame;
249  frame.copyTo(overlayFrame);
250 
251  //Draw a rectangle displaying the bounding box
252  cv::rectangle(overlayFrame, box, cv::Scalar(color[2],color[1],color[0]), thickness);
253 
254  if(display_text){
255  //Get the label for the class name and its confidence
256  std::string label = cv::format("%.2f", conf);
257  if (!classNames.empty())
258  {
259  CV_Assert(classId < (int)classNames.size());
260  label = classNames[classId] + ":" + label;
261  }
262 
263  //Display the label at the top of the bounding box
264  int baseLine;
265  cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
266 
267  double left = box.x;
268  double top = std::max((int)box.y, labelSize.height);
269 
270  cv::rectangle(overlayFrame, cv::Point(left, top - round(1.025*labelSize.height)), cv::Point(left + round(1.025*labelSize.width), top + baseLine),
271  cv::Scalar(color[2],color[1],color[0]), cv::FILLED);
272  putText(overlayFrame, label, cv::Point(left+1, top), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0,0,0),1);
273  }
274  // add opacity
275  cv::addWeighted(overlayFrame, 1-alpha, frame, alpha, 0, frame);
276  }
277 }
278 
279 // Load protobuf data file
280 bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
281  // Create tracker message
282  pb_objdetect::ObjDetect objMessage;
283 
284  // Read the existing tracker message.
285  std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
286  if (!objMessage.ParseFromIstream(&input)) {
287  std::cerr << "Failed to parse protobuf message." << std::endl;
288  return false;
289  }
290 
291  // Make sure classNames, detectionsData and trackedObjects are empty
292  classNames.clear();
293  detectionsData.clear();
294  trackedObjects.clear();
295 
296  // Seed to generate same random numbers
297  std::srand(1);
298  // Get all classes names and assign a color to them
299  for(int i = 0; i < objMessage.classnames_size(); i++)
300  {
301  classNames.push_back(objMessage.classnames(i));
302  classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
303  }
304 
305  // Iterate over all frames of the saved message
306  for (size_t i = 0; i < objMessage.frame_size(); i++)
307  {
308  // Create protobuf message reader
309  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
310 
311  // Get frame Id
312  size_t id = pbFrameData.id();
313 
314  // Load bounding box data
315  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
316 
317  // Construct data vectors related to detections in the current frame
318  std::vector<int> classIds;
319  std::vector<float> confidences;
320  std::vector<cv::Rect_<float>> boxes;
321  std::vector<int> objectIds;
322 
323  // Iterate through the detected objects
324  for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
325  {
326  // Get bounding box coordinates
327  float x = pBox.Get(i).x();
328  float y = pBox.Get(i).y();
329  float w = pBox.Get(i).w();
330  float h = pBox.Get(i).h();
331  // Get class Id (which will be assign to a class name)
332  int classId = pBox.Get(i).classid();
333  // Get prediction confidence
334  float confidence = pBox.Get(i).confidence();
335 
336  // Get the object Id
337  int objectId = pBox.Get(i).objectid();
338 
339  // Search for the object id on trackedObjects map
340  auto trackedObject = trackedObjects.find(objectId);
341  // Check if object already exists on the map
342  if (trackedObject != trackedObjects.end())
343  {
344  // Add a new BBox to it
345  trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
346  }
347  else
348  {
349  // There is no tracked object with that id, so insert a new one
350  TrackedObjectBBox trackedObj((int)classesColor[classId](0), (int)classesColor[classId](1), (int)classesColor[classId](2), (int)0);
351  trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
352 
353  std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
354  ClipBase* parentClip = this->ParentClip();
355  trackedObjPtr->ParentClip(parentClip);
356 
357  // Create a temp ID. This ID is necessary to initialize the object_id Json list
358  // this Id will be replaced by the one created in the UI
359  trackedObjPtr->Id(std::to_string(objectId));
360  trackedObjects.insert({objectId, trackedObjPtr});
361  }
362 
363  // Create OpenCV rectangle with the bouding box info
364  cv::Rect_<float> box(x, y, w, h);
365 
366  // Push back data into vectors
367  boxes.push_back(box);
368  classIds.push_back(classId);
369  confidences.push_back(confidence);
370  objectIds.push_back(objectId);
371  }
372 
373  // Assign data to object detector map
374  detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
375  }
376 
377  // Delete all global objects allocated by libprotobuf.
378  google::protobuf::ShutdownProtobufLibrary();
379 
380  return true;
381 }
382 
383 // Get the indexes and IDs of all visible objects in the given frame
384 std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
385 
386  // Initialize the JSON objects
387  Json::Value root;
388  root["visible_objects_index"] = Json::Value(Json::arrayValue);
389  root["visible_objects_id"] = Json::Value(Json::arrayValue);
390 
391  // Check if track data exists for the requested frame
392  if (detectionsData.find(frame_number) == detectionsData.end()){
393  return root.toStyledString();
394  }
395  DetectionData detections = detectionsData.at(frame_number);
396 
397  // Iterate through the tracked objects
398  for(int i = 0; i<detections.boxes.size(); i++){
399  // Does not show boxes with confidence below the threshold
400  if(detections.confidences.at(i) < confidence_threshold){
401  continue;
402  }
403 
404  // Just display selected classes
405  if( display_classes.size() > 0 &&
406  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end()){
407  continue;
408  }
409 
410  int objectId = detections.objectIds.at(i);
411  // Search for the object in the trackedObjects map
412  auto trackedObject = trackedObjects.find(objectId);
413 
414  // Get the tracked object JSON properties for this frame
415  Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
416 
417  if (trackedObjectJSON["visible"]["value"].asBool() &&
418  trackedObject->second->ExactlyContains(frame_number)){
419  // Save the object's index and ID if it's visible in this frame
420  root["visible_objects_index"].append(trackedObject->first);
421  root["visible_objects_id"].append(trackedObject->second->Id());
422  }
423  }
424 
425  return root.toStyledString();
426 }
427 
428 // Generate JSON string of this object
429 std::string ObjectDetection::Json() const {
430 
431  // Return formatted string
432  return JsonValue().toStyledString();
433 }
434 
435 // Generate Json::Value for this object
436 Json::Value ObjectDetection::JsonValue() const {
437 
438  // Create root json object
439  Json::Value root = EffectBase::JsonValue(); // get parent properties
440  root["type"] = info.class_name;
441  root["protobuf_data_path"] = protobuf_data_path;
442  root["selected_object_index"] = selectedObjectIndex;
443  root["confidence_threshold"] = confidence_threshold;
444  root["display_box_text"] = display_box_text.JsonValue();
445 
446  // Add tracked object's IDs to root
447  Json::Value objects;
448  for (auto const& trackedObject : trackedObjects){
449  Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
450  // add object json
451  objects[trackedObject.second->Id()] = trackedObjectJSON;
452  }
453  root["objects"] = objects;
454 
455  // return JsonValue
456  return root;
457 }
458 
459 // Load JSON string into this object
460 void ObjectDetection::SetJson(const std::string value) {
461 
462  // Parse JSON string into JSON objects
463  try
464  {
465  const Json::Value root = openshot::stringToJson(value);
466  // Set all values that match
467  SetJsonValue(root);
468  }
469  catch (const std::exception& e)
470  {
471  // Error parsing JSON (or missing keys)
472  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
473  }
474 }
475 
476 // Load Json::Value into this object
477 void ObjectDetection::SetJsonValue(const Json::Value root) {
478  // Set parent data
479  EffectBase::SetJsonValue(root);
480 
481  // Set data from Json (if key is found)
482  if (!root["protobuf_data_path"].isNull() && protobuf_data_path.size() <= 1){
483  protobuf_data_path = root["protobuf_data_path"].asString();
484 
485  if(!LoadObjDetectdData(protobuf_data_path)){
486  throw InvalidFile("Invalid protobuf data path", "");
487  protobuf_data_path = "";
488  }
489  }
490 
491  // Set the selected object index
492  if (!root["selected_object_index"].isNull())
493  selectedObjectIndex = root["selected_object_index"].asInt();
494 
495  if (!root["confidence_threshold"].isNull())
496  confidence_threshold = root["confidence_threshold"].asFloat();
497 
498  if (!root["display_box_text"].isNull())
499  display_box_text.SetJsonValue(root["display_box_text"]);
500 
501  if (!root["class_filter"].isNull()){
502  class_filter = root["class_filter"].asString();
503  std::stringstream ss(class_filter);
504  display_classes.clear();
505  while( ss.good() )
506  {
507  // Parse comma separated string
508  std::string substr;
509  std::getline( ss, substr, ',' );
510  display_classes.push_back( substr );
511  }
512  }
513 
514  if (!root["objects"].isNull()){
515  for (auto const& trackedObject : trackedObjects){
516  std::string obj_id = std::to_string(trackedObject.first);
517  if(!root["objects"][obj_id].isNull()){
518  trackedObject.second->SetJsonValue(root["objects"][obj_id]);
519  }
520  }
521  }
522 
523  // Set the tracked object's ids
524  if (!root["objects_id"].isNull()){
525  for (auto const& trackedObject : trackedObjects){
526  Json::Value trackedObjectJSON;
527  trackedObjectJSON["box_id"] = root["objects_id"][trackedObject.first].asString();
528  trackedObject.second->SetJsonValue(trackedObjectJSON);
529  }
530  }
531 }
532 
533 // Get all properties for a specific frame
534 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
535 
536  // Generate JSON properties list
537  Json::Value root;
538 
539  Json::Value objects;
540  if(trackedObjects.count(selectedObjectIndex) != 0){
541  auto selectedObject = trackedObjects.at(selectedObjectIndex);
542  if (selectedObject){
543  Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
544  // add object json
545  objects[selectedObject->Id()] = trackedObjectJSON;
546  }
547  }
548  root["objects"] = objects;
549 
550  root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
551  root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
552  root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
553  root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);
554  root["start"] = add_property_json("Start", Start(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
555  root["end"] = add_property_json("End", End(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
556  root["duration"] = add_property_json("Duration", Duration(), "float", "", NULL, 0, 1000 * 60 * 30, true, requested_frame);
557  root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
558  root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
559 
560  root["display_box_text"] = add_property_json("Draw Box Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1.0, false, requested_frame);
561  root["display_box_text"]["choices"].append(add_property_choice_json("Off", 1, display_box_text.GetValue(requested_frame)));
562  root["display_box_text"]["choices"].append(add_property_choice_json("On", 0, display_box_text.GetValue(requested_frame)));
563 
564  // Return formatted string
565  return root.toStyledString();
566 }
Header file for all Exception classes.
Header file for Object Detection effect class.
Header file for Timeline class.
Header file for Tracker effect class.
This abstract class is the base class, used by all clips in libopenshot.
Definition: ClipBase.h:33
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
std::shared_ptr< openshot::Frame > GetFrame(int64_t clip_frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:389
Exception for files that can not be found or opened.
Definition: Exceptions.h:188
Exception for invalid JSON.
Definition: Exceptions.h:218
This class represents a timeline.
Definition: Timeline.h:150
openshot::Clip * GetClip(const std::string &id)
Look up a single clip by ID.
Definition: Timeline.cpp:408
This class contains the properties of a tracked object and functions to manipulate it.
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
std::vector< cv::Rect_< float > > boxes
std::vector< float > confidences
std::vector< int > classIds
std::vector< int > objectIds
This struct holds the information of a bounding-box.
float cy
y-coordinate of the bounding box center
float height
bounding box height
float cx
x-coordinate of the bounding box center
float width
bounding box width