The Scene Text Detection & Extraction & Recognition

Separate Channels

  1. vector<Mat> separateChannels(Mat& src)
  2. {
  3. 	vector<Mat> channels;
  4. 	//Grayscale images
  5. 	if (src.type() == CV_8U || src.type() == CV_8UC1) {
  6. 		channels.push_back(src);
  7. 		channels.push_back(255-src);
  8. 		return channels;
  9. 	}
  10.  
  11. 	//Colored images
  12. 	if (src.type() == CV_8UC3) {
  13. 		computeNMChannels(src, channels);
  14. 		int size = static_cast<int>(channels.size())-1;
  15. 		for (int c = 0; c < size; c++)
  16. 			channels.push_back(255-channels[c]);
  17. 		return channels;
  18. 	}
  19.  
  20. 	//Other types
  21. 	cout << "Invalid image format!" << endl;
  22. 	exit(-1);
  23. }

Detection & Draw Groups Boxes

  1.     //Convert the input image to grayscale.
  2.     //Just do Mat processed = input; to work with colors.
  3.     Mat processed;
  4.     cvtColor(input, processed, CV_RGB2GRAY);
  5.  
  6.     auto channels = separateChannels(processed);
  7.  
  8.     // Create ERFilter objects with the 1st and 2nd stage classifiers
  9.     auto filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
  10.     auto filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);
  11.  
  12.     //Extract text regions using Newmann & Matas algorithm
  13.     cout << "Processing " << channels.size() << " channels..." << endl;
  14.     vector<vector<ERStat> > regions(channels.size());
  15.     for (int c=0; c < channels.size(); c++)
  16.     {
  17.         cout << "    Channel " << (c+1) << endl;
  18.         filter1->run(channels[c], regions[c]);
  19.         filter2->run(channels[c], regions[c]);
  20.     }
  21.     filter1.release();
  22.     filter2.release();
  23.  
  24.     //Separate character groups from regions
  25.     vector< vector<Vec2i> > groups;
  26.     vector<Rect> groupRects;
  27.     erGrouping(input, channels, regions, groups, groupRects, ERGROUPING_ORIENTATION_HORIZ);
  28.     //erGrouping(input, channels, regions, groups, groupRects, ERGROUPING_ORIENTATION_ANY, "trained_classifier_erGrouping.xml", 0.5);
  29.  
  30.     // draw groups boxes
  31.     for (auto rect : groupRects)
  32.         rectangle(input, rect, Scalar(0, 255, 0), 3);

Get Extremal Region

  1. Mat drawER(const vector<Mat> &channels, const vector<vector<ERStat> > &regions, const vector<Vec2i>& group, const Rect& rect)
  2. {
  3.     Mat out = Mat::zeros(channels[0].rows+2, channels[0].cols+2, CV_8UC1);
  4.  
  5.     int flags = 4					//4 neighbors
  6.                 + (255 << 8)				//paint mask in white (255)
  7. 		+ FLOODFILL_FIXED_RANGE		//fixed range
  8. 		+ FLOODFILL_MASK_ONLY;		//Paint just the mask
  9.  
  10.     for (int g=0; g < group.size(); g++)
  11.     {
  12.         int idx = group[g][0];
  13.         ERStat er = regions[idx][group[g][1]];
  14.  
  15.         //Ignore root region
  16.         if (er.parent == NULL)
  17.             continue;
  18.  
  19.     //Transform the linear pixel value to row and col
  20.     int px = er.pixel % channels[idx].cols;
  21.     int py = er.pixel / channels[idx].cols;
  22.  
  23.     //Create the point and adds it to the list.
  24.     Point p(px, py);
  25.  
  26.     //Draw the extremal region
  27.     floodFill(
  28.                 channels[idx], out,				//Image and mask
  29.                 p, Scalar(255),					//Seed and color
  30.                 nullptr,						//No rect
  31.                 Scalar(er.level),Scalar(0),		//LoDiff and upDiff
  32.                 flags							//Flags
  33.             );
  34.     }
  35.  
  36.     //Crop just the text area and find it's points
  37.     out = out(rect);
  38.  
  39.     vector<Point> points;
  40.     findNonZero(out, points);
  41.     //Use deskew and crop to crop it perfectly
  42.     return deskewAndCrop(out, minAreaRect(points));
  43. }

Create ERFilter

  1.     // Create ERFilter objects with the 1st and 2nd stage classifiers
  2.     auto filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
  3.     auto filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5);
  4.  
  5.     //Extract text regions using Newmann & Matas algorithm
  6.     cout << "Processing " << channels.size() << " channels..." << endl;
  7.     vector<vector<ERStat> > regions(channels.size());
  8.     for (int c=0; c < channels.size(); c++)
  9.     {
  10.         cout << "    Channel " << (c+1) << endl;
  11.         filter1->run(channels[c], regions[c]);
  12.         filter2->run(channels[c], regions[c]);
  13.     }
  14.     filter1.release();
  15.     filter2.release();

Separate Characters & OCR->Run

  1.     //Separate character groups from regions
  2.     vector< vector<Vec2i> > groups;
  3.     vector<Rect> groupRects;
  4.     erGrouping(input, channels, regions, groups, groupRects, ERGROUPING_ORIENTATION_HORIZ);
  5.     //erGrouping(input, channels, regions, groups, groupRects, ERGROUPING_ORIENTATION_ANY, "trained_classifier_erGrouping.xml", 0.5);
  6.  
  7.     // text detection
  8.     cout << endl << "Detected text:" << endl;
  9.     cout << "-------------" << endl;
  10.     auto ocr = initOCR("tesseract");
  11.     for (int i = 0; i < groups.size(); i++)
  12.     {
  13.         Mat wordImage = drawER(channels, regions, groups[i], groupRects[i]);
  14.  
  15.         string word;
  16.         ocr->run(wordImage, word);
  17.         cout << word << endl;
  18.     }

Leave a Reply

Your email address will not be published. Required fields are marked *