Water's Home

Just another Life Style

0%

The Scene Text Detection & Extraction & Recognition

Separate Channels

vector separateChannels(Mat& src)
{
vector channels;
//Grayscale images
if (src.type() == CV_8U src.type() == CV_8UC1) {
channels.push_back(src);
channels.push_back(255-src);
return channels;
}

//Colored images
if (src.type() == CV_8UC3) {
computeNMChannels(src, channels);
int size = static_cast(channels.size())-1;
for (int c = 0; c < size; c++)
channels.push_back(255-channels[c]);
return channels;
}

//Other types
cout << “Invalid image format!” << endl;
exit(-1);
}

Detection & Draw Groups Boxes

//Convert the input image to grayscale.
//Just do Mat processed = input; to work with colors.
Mat processed;
cvtColor(input, processed, CV\_RGB2GRAY);

auto channels = separateChannels(processed);

// Create ERFilter objects with the 1st and 2nd stage classifiers
auto filter1 = createERFilterNM1(loadClassifierNM1("trained\_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
auto filter2 = createERFilterNM2(loadClassifierNM2("trained\_classifierNM2.xml"),0.5);

//Extract text regions using Newmann & Matas algorithm
cout << "Processing " << channels.size() << " channels..." << endl;
vector > regions(channels.size());
for (int c=0; c < channels.size(); c++)
{
    cout << "    Channel " << (c+1) << endl;
    filter1->run(channels\[c\], regions\[c\]);
    filter2->run(channels\[c\], regions\[c\]);
}
filter1.release();
filter2.release();

//Separate character groups from regions
vector< vector > groups;
vector groupRects;
erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_HORIZ);
//erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_ANY, "trained\_classifier\_erGrouping.xml", 0.5);

// draw groups boxes
for (auto rect : groupRects)
    rectangle(input, rect, Scalar(0, 255, 0), 3); 

Get Extremal Region

Mat drawER(const vector &channels, const vector > ®ions, const vector& group, const Rect& rect)
{
Mat out = Mat::zeros(channels[0].rows+2, channels[0].cols+2, CV_8UC1);

int flags = 4//4 neighbors
            + (255 << 8)//paint mask in white (255)
  • FLOODFILL_FIXED_RANGE//fixed range

  • FLOODFILL_MASK_ONLY;//Paint just the mask

    for (int g=0; g < group.size(); g++)
    {
    int idx = group[g][0];
    ERStat er = regions[idx][group[g][1]];

    //Ignore root region
    if (er.parent == NULL)
    continue;
    //Transform the linear pixel value to row and col
    int px = er.pixel % channels[idx].cols;
    int py = er.pixel / channels[idx].cols;

    //Create the point and adds it to the list.
    Point p(px, py);

    //Draw the extremal region
    floodFill(
    channels[idx], out,//Image and mask
    p, Scalar(255),//Seed and color
    nullptr,//No rect
    Scalar(er.level),Scalar(0),//LoDiff and upDiff
    flags//Flags
    );
    }

    //Crop just the text area and find it’s points
    out = out(rect);

    vector points;
    findNonZero(out, points);
    //Use deskew and crop to crop it perfectly
    return deskewAndCrop(out, minAreaRect(points));

}

Create ERFilter

// Create ERFilter objects with the 1st and 2nd stage classifiers
auto filter1 = createERFilterNM1(loadClassifierNM1("trained\_classifierNM1.xml"),15,0.00015f,0.13f,0.2f,true,0.1f);
auto filter2 = createERFilterNM2(loadClassifierNM2("trained\_classifierNM2.xml"),0.5);

//Extract text regions using Newmann & Matas algorithm
cout << "Processing " << channels.size() << " channels..." << endl;
vector > regions(channels.size());
for (int c=0; c < channels.size(); c++)
{
    cout << "    Channel " << (c+1) << endl;
    filter1->run(channels\[c\], regions\[c\]);
    filter2->run(channels\[c\], regions\[c\]);
}
filter1.release();
filter2.release(); 

Separate Characters & OCR->Run

//Separate character groups from regions
vector< vector > groups;
vector groupRects;
erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_HORIZ);
//erGrouping(input, channels, regions, groups, groupRects, ERGROUPING\_ORIENTATION\_ANY, "trained\_classifier\_erGrouping.xml", 0.5);

// text detection
cout << endl << "Detected text:" << endl;
cout << "-------------" << endl;
auto ocr = initOCR("tesseract");
for (int i = 0; i < groups.size(); i++)
{
    Mat wordImage = drawER(channels, regions, groups\[i\], groupRects\[i\]);

    string word;
    ocr->run(wordImage, word);
    cout << word << endl;
}