The Scene Text Detection & Extraction & Recognition

//Colored images
if (src.type() == CV_8UC3) {
computeNMChannels(src, channels);
int size = static_cast(channels.size())-1;
for (int c = 0; c < size; c++)
channels.push_back(255-channels[c]);
return channels;
}

//Other types
cout << “Invalid image format!” << endl;
exit(-1);
}

Detection & Draw Groups Boxes

//Convert the input image to grayscale.
//Just do Mat processed = input; to work with colors.
Mat processed;
cvtColor(input, processed, CV\_RGB2GRAY);

auto channels = separateChannels(processed);

// Create ERFilter objects with the 1st and 2nd stage classifiers
auto filter1 = createERFilterNM1(loadClassifierNM1("trained\_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
auto filter2 = createERFilterNM2(loadClassifierNM2("trained\_classifierNM2.xml"),0.5);

//Extract text regions using Newmann & Matas algorithm
cout << "Processing " << channels.size() << " channels..." << endl;
vector > regions(channels.size());
for (int c=0; c < channels.size(); c++)
{
    cout << "    Channel " << (c+1) << endl;
    filter1->run(channels\[c\], regions\[c\]);
    filter2->run(channels\[c\], regions\[c\]);
}
filter1.release();
filter2.release();

//Separate character groups from regions
vector< vector > groups;
vector groupRects;
erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_HORIZ);
//erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_ANY, "trained\_classifier\_erGrouping.xml", 0.5);

// draw groups boxes
for (auto rect : groupRects)
    rectangle(input, rect, Scalar(0, 255, 0), 3);

Get Extremal Region

Mat drawER(const vector &channels, const vector > ®ions, const vector& group, const Rect& rect)
{
Mat out = Mat::zeros(channels[0].rows+2, channels[0].cols+2, CV_8UC1);

int flags = 4//4 neighbors
            + (255 << 8)//paint mask in white (255)

FLOODFILL_FIXED_RANGE//fixed range
FLOODFILL_MASK_ONLY;//Paint just the mask

for (int g=0; g < group.size(); g++)
{
int idx = group[g][0];
ERStat er = regions[idx][group[g][1]];

//Ignore root region
if (er.parent == NULL)
continue;
//Transform the linear pixel value to row and col
int px = er.pixel % channels[idx].cols;
int py = er.pixel / channels[idx].cols;

//Create the point and adds it to the list.
Point p(px, py);

//Draw the extremal region
floodFill(
channels[idx], out,//Image and mask
p, Scalar(255),//Seed and color
nullptr,//No rect
Scalar(er.level),Scalar(0),//LoDiff and upDiff
flags//Flags
);
}

//Crop just the text area and find it’s points
out = out(rect);

vector points;
findNonZero(out, points);
//Use deskew and crop to crop it perfectly
return deskewAndCrop(out, minAreaRect(points));

}

Create ERFilter

// Create ERFilter objects with the 1st and 2nd stage classifiers
auto filter1 = createERFilterNM1(loadClassifierNM1("trained\_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
auto filter2 = createERFilterNM2(loadClassifierNM2("trained\_classifierNM2.xml"),0.5);

//Extract text regions using Newmann & Matas algorithm
cout << "Processing " << channels.size() << " channels..." << endl;
vector > regions(channels.size());
for (int c=0; c < channels.size(); c++)
{
    cout << "    Channel " << (c+1) << endl;
    filter1->run(channels\[c\], regions\[c\]);
    filter2->run(channels\[c\], regions\[c\]);
}
filter1.release();
filter2.release();

Separate Characters & OCR->Run

//Separate character groups from regions
vector< vector > groups;
vector groupRects;
erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_HORIZ);
//erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_ANY, "trained\_classifier\_erGrouping.xml", 0.5);

// text detection
cout << endl << "Detected text:" << endl;
cout << "-------------" << endl;
auto ocr = initOCR("tesseract");
for (int i = 0; i < groups.size(); i++)
{
    Mat wordImage = drawER(channels, regions, groups\[i\], groupRects\[i\]);

    string word;
    ocr->run(wordImage, word);
    cout << word << endl;
}

Water's Home

The Scene Text Detection & Extraction & Recognition

Separate Channels

Detection & Draw Groups Boxes

Get Extremal Region

Create ERFilter

Separate Characters & OCR->Run