themblem/alg/libqr.cpp

#include <iostream>
#include <string>
#include "libqr.h"
#include "opencv2/objdetect.hpp"
#include "opencv2/wechat_qrcode.hpp"
#include "string_format.h"
using namespace std;
using namespace cv;

static
vector<Point> transform_image(Mat &in, vector<Point> qr_points, Mat &out)
{
    Mat src = (Mat_<float>(4, 2) <<
               qr_points[0].x, qr_points[0].y,
               qr_points[1].x, qr_points[1].y,
               qr_points[2].x, qr_points[2].y,
               qr_points[3].x, qr_points[3].y
               );

    int min_x = qr_points[0].x;
    int min_y = qr_points[0].y;
    int max_x = qr_points[0].x;
    int max_y = qr_points[0].y;
    for (auto p: qr_points) {
        min_x = min(p.x, min_x);
        min_y = min(p.y, min_y);
        max_x = max(p.x, max_x);
        max_y = max(p.y, max_y);
    }
    Mat dst = (Mat_<float>(4, 2) <<
               min_x, min_y,
               max_x, min_y,
               max_x, max_y,
               min_x, max_y);

    Mat m = getPerspectiveTransform(src, dst);
    warpPerspective(in, out, m, in.size());
    vector<Point> ret;
    ret.push_back(Point(min_x, min_y));
    ret.push_back(Point(max_x, min_y));
    ret.push_back(Point(max_x, max_y));
    ret.push_back(Point(min_x, max_y));
    return ret;
}

bool detect_qr(ProcessState &ps, float margin_ratio, bool warp, string &err)
{
#if WECHAT_QRCODE_USE_MODEL
    auto wr = wechat_qrcode::WeChatQRCode(
            "wechat_qrcode/detect.prototxt",
            "wechat_qrcode/detect.caffemodel",
            "wechat_qrcode/sr.prototxt",
            "wechat_qrcode/sr.caffemodel");
#else
    auto wr = wechat_qrcode::WeChatQRCode();
#endif
    vector<Mat> qrs;
    auto r = wr.detectAndDecode(ps.preprocessed, qrs);

    if (!r.size()) {
        err = "qr not detected";
        return false;
    }

    ps.qrcode = r[0];
    auto rect = qrs[0];
    vector<Point> qr_points;
    qr_points.push_back(Point(rect.at<float>(0, 0) / ps.scale, rect.at<float>(0, 1) / ps.scale));
    qr_points.push_back(Point(rect.at<float>(1, 0) / ps.scale, rect.at<float>(1, 1) / ps.scale));
    qr_points.push_back(Point(rect.at<float>(2, 0) / ps.scale, rect.at<float>(2, 1) / ps.scale));
    qr_points.push_back(Point(rect.at<float>(3, 0) / ps.scale, rect.at<float>(3, 1) / ps.scale));
    ps.qr_points = qr_points;
    Mat warped;
    vector<Point> warped_qr_points;
    if (warp) {
        warped_qr_points = transform_image(*ps.orig, qr_points, warped);
    } else {
        warped = *ps.orig;
        warped_qr_points = qr_points;
    }
    int min_x = warped_qr_points[0].x;
    int min_y = warped_qr_points[0].y;
    int max_x = min_x;
    int max_y = min_y;
    for (auto p: warped_qr_points) {
        min_x = min(p.x, min_x);
        min_y = min(p.y, min_y);
        max_x = max(p.x, max_x);
        max_y = max(p.y, max_y);
    }
    int margin = (max_x - min_x) * margin_ratio;
    if (min_y < margin || min_x < margin || max_x + margin >= warped.cols || max_y + margin >= warped.rows) {
        err = "qr margin too small";
        return false;
    }
    int qr_width = max_x - min_x;
    int qr_height = max_y - min_y;
    if (qr_width < 200 && qr_height < 200 && qr_width < ps.orig->cols * 0.5 && qr_height < ps.orig->rows * 0.5) {
        printf("(%d, %d) in (%d, %d)\n", qr_width, qr_height, ps.orig->cols, ps.orig->rows);
        err = "qr too small";
        return false;
    }

    Rect qr_rect(min_x, min_y, max_x - min_x, max_y - min_y);
    ps.qr_straighten = warped(qr_rect);
    Rect qr_with_margin_rect(min_x - margin, min_y - margin,
                             max_x - min_x + margin * 2,
                             max_y - min_y + margin * 2);
    ps.straighten = warped(qr_with_margin_rect);
    Mat g;
    cvtColor(ps.straighten, g, COLOR_BGR2GRAY);
    equalizeHist(g, g);

    Rect dot_rect;
    dot_rect.x = 0;
    dot_rect.y = 0;
    dot_rect.width = margin / 2;
    dot_rect.height = margin / 2;
    ps.dot_area = ps.straighten(dot_rect);
    Mat dot_area_gray = g(dot_rect);
    resize(dot_area_gray, ps.dot_area_gray, Size(64, 64));
    return true;
}

bool preprocess(ProcessState &ps)
{
    Mat gray;
    cvtColor(*ps.orig, gray, COLOR_BGR2GRAY);
    ps.scale = 1.0;
    const float size_cap = 512;
    if (ps.orig->rows > size_cap) {
        ps.scale = size_cap / ps.orig->rows;
    }
    if (ps.orig->cols > ps.orig->rows && ps.orig->cols > size_cap) {
        ps.scale = size_cap / ps.orig->cols;
    }
    resize(gray, ps.preprocessed, Size(), ps.scale, ps.scale);
    return true;
}

struct EnergyGradient {
    double x;
    double y;
};

static
EnergyGradient energy_gradient(Mat &gray_img)
{

  Mat smd_image_x, smd_image_y, G;

  Mat kernel_x(3, 3, CV_32F, Scalar(0));
  kernel_x.at<float>(1, 2) = -1.0;
  kernel_x.at<float>(1, 1) = 1.0;
  Mat kernel_y(3, 3, CV_32F, Scalar(0));
  kernel_y.at<float>(1, 1) = 1.0;
  kernel_y.at<float>(2, 1) = -1.0;
  filter2D(gray_img, smd_image_x, gray_img.depth(), kernel_x);
  filter2D(gray_img, smd_image_y, gray_img.depth(), kernel_y);

  multiply(smd_image_x, smd_image_x, smd_image_x);
  multiply(smd_image_y, smd_image_y, smd_image_y);

  EnergyGradient ret = { mean(smd_image_x)[0], mean(smd_image_y)[0], };
  return ret;
}

static
bool check_blur_by_energy_gradient(Mat &gray, string &err)
{
    const int thres = 85;
    auto a = energy_gradient(gray);

    float angle = 45;
    auto m = getRotationMatrix2D(Point2f(gray.cols / 2, gray.rows / 2), angle, 1.0);
    Mat rotated;
    warpAffine(gray, rotated, m, gray.size());

    auto b = energy_gradient(rotated);

    auto diffa = fabs(a.x - a.y);
    auto diffb = fabs(b.x - b.y);
    auto diffa_percent = 100 * diffa / max(a.x, a.y);
    auto diffb_percent = 100 * diffb / max(b.x, b.y);
    bool ret =
        ((a.x > thres && a.y > thres) || (b.x > thres && b.y > thres)) &&
        diffa_percent < 15 && diffb_percent < 15;

    cout << "energy: "
                + to_string(a.x) + " "
                + to_string(a.y) + " "
                + to_string(b.x) + " "
                + to_string(b.y) << endl;
    if (!ret) {
        err = "energy: "
                + to_string(a.x) + " "
                + to_string(a.y) + " "
                + to_string(b.x) + " "
                + to_string(b.y);
    }

    return ret;
}

double laplacian(Mat &gray, string &err)
{
    int ddepth = CV_16S;
    Mat check, lap;
    GaussianBlur(gray, check, Size(5, 5), 0, 0, BORDER_DEFAULT);
    Laplacian(check, lap, ddepth, 3);
    convertScaleAbs(lap, lap);

    Mat mean, stddev;
    meanStdDev(lap, mean, stddev);
    if (stddev.cols * stddev.rows == 1) {
        double area = gray.rows * gray.cols;
        double sd = stddev.at<double>(0, 0);
        double var = sd * sd;
        return var / area;
    }
    err = "wrong shape of stddev result";
    return -1;
}

static
bool check_blur_by_laplacian(ProcessState &ps, Mat &gray, string &err)
{
    auto var = laplacian(gray, err);
    if (var < 0) return false;

    ps.clarity = var;
    if (var <= ps.laplacian_thres) {
        err = string_format("image (%d x %d) too blurry: %lf <= %lf",
                            gray.cols, gray.rows,
                            var, ps.laplacian_thres
                           );
        return false;
    }
    return true;
}

static
bool check_blur(ProcessState &ps, Mat &gray, string &err)
{
    bool use_energy_gradient = false;
    if (use_energy_gradient) {
        return check_blur_by_energy_gradient(gray, err);
    }
    return check_blur_by_laplacian(ps, gray, err);
}

#define COUNT_COMPONENTS 0
#if COUNT_COMPONENTS
static bool is_valid_pattern(Mat &img)
{
    Mat labels;
    Mat stats;
    Mat centroids;
    connectedComponentsWithStats(img, labels, stats, centroids);
    int valid = 0;
    for (auto i = 0; i < stats.rows; i++) {
        int area = stats.at<int>(i, CC_STAT_AREA);
        if (area > 5) {
            valid++;
        }
    }

    return valid > 25;
}
#endif

static
int find_score(Mat &img)
{
    int ret = 0;
    for (int row = 0; row < img.rows; row++) {
        int row_sum = 0;
        for (int col = 0; col < img.cols; col++) {
            auto p = img.at<bool>(row, col);
            if (p) {
                row_sum += 1;
            }
        }
        if (row_sum) {
            ret += 1;
        }
    }
    return ret;
}

static void clear_connected(Mat &bin, Point p)
{
    vector<Point> q;
    q.push_back(p);
    while (q.size()) {
        auto p = q[q.size() - 1];
        q.pop_back();
        bin.at<uint8_t>(p.y, p.x) = 0;
        for (int i = -1; i <= 1; i++) {
            for (int j = -1; j <= 1; j++) {
                int nx = p.x + i;
                int ny = p.y + j;
                if (nx < 0 || nx >= bin.cols || ny < 0 || ny >= bin.rows) {
                    continue;
                }
                if (bin.at<bool>(ny, nx)) {
                    q.push_back(Point(nx, ny));
                }
            }
        }
    }
}

static
vector<Point> find_points(Mat bin)
{
    vector<Point> ret;
    for (int x = 0; x < bin.cols; x++) {
        for (int y = 0; y < bin.rows; y++) {
            auto p = bin.at<uint8_t>(y, x);
            if (!p) continue;
            auto point = Point(x, y);
            ret.push_back(point);
            clear_connected(bin, point);
        }
    }
    return ret;
}

static
int adaptive_erode(Mat &bin, Mat &eroded, string &err)
{
    auto kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    const int min_points = 25;
    int max_erodes = 5;

    printf("adaptiveThreshold\n");
    eroded = bin.clone();
    while (max_erodes-- > 0) {
        // In this loop we erode a "full" image in order to get enough detached components
        auto points = find_points(bin.clone());
        printf("points: %zu\n", points.size());
        if (points.size() == 0) {
            err = "cannot find enough points";
            return -1;
        }
        if (points.size() > min_points) {
            break;
        }
        erode(eroded, eroded, kernel);
    }

    while (max_erodes-- > 0) {
        // In this loop we further erode a "lean" image in order to get clarity until it's too much
        Mat next;
        erode(eroded, next, kernel);
        auto points = find_points(next.clone());
        if (points.size() < min_points) {
            break;
        }
        eroded = next;
    }

    return 0;
}

static
int emblem_detect_angle(Mat &gray, bool check_orthogonal, string &err)
{
    Mat bin;
    int min_score = gray.cols;
    int max_score = 0;
    int lowest_score_angle = -1;

    adaptiveThreshold(gray, bin, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY_INV, 11, 2);

    Mat inverted;
    bitwise_not(bin, inverted);
    const int MAX_ROT_ANGLE = 180;
    int scores[MAX_ROT_ANGLE] = { 0 };
    const int score_diff_thres = 5;

    Mat eroded;
    adaptive_erode(bin, eroded, err);

    for (int angle = 0; angle < MAX_ROT_ANGLE; angle += 1) {
        auto m = getRotationMatrix2D(Point2f(gray.cols / 2, gray.rows / 2), angle, 1.0);
        Mat rotated;
        warpAffine(eroded, rotated, m, gray.size());
        int score = find_score(rotated);
        scores[angle] = score;
        if (score < min_score) {
            lowest_score_angle = angle;
        }
        min_score = min(score, min_score);
        max_score = max(max_score, score);
    }
    if (max_score - min_score > score_diff_thres) {
        int orthogonal_angle = lowest_score_angle + 90;
        if (orthogonal_angle > 180) {
            orthogonal_angle -= 180;
        }
        int orthogonal_score = scores[orthogonal_angle];
        printf("lowest_score_angle %d, min score %d, max score %d, orthogonal_angle %d, orthogonal score: %d\n",
               lowest_score_angle, min_score, max_score, orthogonal_angle, orthogonal_score);
        lowest_score_angle = lowest_score_angle > 90 ? lowest_score_angle - 90 : lowest_score_angle;
        if (lowest_score_angle > 45)
            lowest_score_angle = 90 - lowest_score_angle;
        if (max_score - orthogonal_score > score_diff_thres || !check_orthogonal) {
            return lowest_score_angle;
        }
    }

    return -1;
}

bool emblem_dot_angle(ProcessState &ps, InputArray in, float &angle, string &qrcode, string &err)
{
    try {

        ps.orig = (Mat *)in.getObj();
        preprocess(ps);

        if (!detect_qr(ps, 0.20, true, err)) {
            qrcode = ps.qrcode;
            err = "detect_qr: " + err;
            return false;
        }

        qrcode = ps.qrcode;

        if (!check_blur(ps, ps.dot_area_gray, err)) {
            return false;
        }

        int a = emblem_detect_angle(ps.dot_area_gray, false, err);
        if (a > 0) {
            angle = a;
            return true;
        } else {
            err = "cannot detect angle";
            return false;
        }
    } catch (const std::exception &exc) {
        std::cout << exc.what() << std::endl;
        err = "exception";
        return false;
    } catch (...) {
        err = "unknown error";
        return false;
    }
}

static
Mat adaptive_gray(Mat &img)
{
    Mat ret;
    Mat mean, stddev;
    Mat channels[3];
    Mat hsv_img;

    meanStdDev(img, mean, stddev);

    int bgr_max_std_channel = 0;
    float bgr_max_std = stddev.at<float>(0, 0);
    for (int i = 1; i < 3; i++) {
        auto nv = stddev.at<float>(0, i);
        if (nv > bgr_max_std_channel) {
            bgr_max_std_channel = i;
            bgr_max_std = nv;
        }
    }
    cvtColor(img, hsv_img, COLOR_BGR2HSV);
    meanStdDev(img, hsv_img, stddev);
    int hsv_max_std_channel = 0;
    float hsv_max_std = stddev.at<float>(0, 0);
    for (int i = 1; i < 3; i++) {
        auto nv = stddev.at<float>(0, i);
        if (nv > hsv_max_std_channel) {
            hsv_max_std_channel = i;
            hsv_max_std = nv;
        }
    }
    if (hsv_max_std > bgr_max_std) {
        split(hsv_img, channels);
        printf("using hsv channel %d\n", hsv_max_std_channel);
        ret = channels[hsv_max_std_channel];
    } else {
        split(img, channels);
        printf("using rgb channel %d\n", bgr_max_std_channel);
        ret = channels[bgr_max_std_channel];
    }
    return ret;
}

static
bool cell_in_bg(int cell_x, int cell_y)
{
    return
        (cell_x == 1 && (cell_y > 0 && cell_y < 6)) ||
        (cell_x == 2 && (cell_y == 1 || cell_y == 5)) ||
        (cell_x == 3 && (cell_y == 1 || cell_y == 5)) ||
        (cell_x == 4 && (cell_y == 1 || cell_y == 5)) ||
        (cell_x == 5 && cell_y > 0 && cell_y < 6)
        ;
}

static
bool roi_in_bg(int w, int h, Point p)
{
    int cell_x = p.x * 7 / w;
    int cell_y = p.y * 7 / h;
    return cell_in_bg(cell_x, cell_y);
}

static
void roi_mask(Mat &img, int margin_pct)
{
    int counts[256] = { 0 };
    for (int i = 0; i < img.cols; i++) {
        for (int j = 0; j < img.rows; j++) {
            uint8_t p = img.at<uint8_t>(Point(i, j));
            counts[p]++;
        }
    }
    int cut = 20;
    int seen = 0;
    int total = img.cols * img.rows;
    int p05, p95;
    for (p05 = 0; seen < total * cut / 100 && p05 < 256; p05++) {
        seen += counts[p05];
    }

    seen = 0;
    for (p95 = 0; seen < total * (100 - cut) / 100 && p95 < 256; p95++) {
        seen += counts[p95];
    }

    printf("p05: %d, p95: %d\n", p05, p95);
    int cap = (p95 - p05) * margin_pct / 100;
    int min_thres = p05 + cap;
    int max_thres = p95 - cap;

    for (int i = 0; i < img.cols; i++) {
        for (int j = 0; j < img.rows; j++) {
            auto pos = Point(i, j);
            uint8_t p = img.at<uint8_t>(pos);
            if (!roi_in_bg(img.cols, img.rows, pos)) {
                img.at<uint8_t>(pos) = 0;
            } else if (p < min_thres) {
                img.at<uint8_t>(pos) = 0;
            } else if (p > max_thres) {
                img.at<uint8_t>(pos) = 0;
            } else {
                img.at<uint8_t>(pos) = 255;
            }
        }
    }
}

static
vector<float> roi_extract_features(Mat &img)
{
    vector<int> ones(49, 0);
    vector<int> zeroes(49, 0);
    for (int i = 0; i < img.cols; i++) {
        for (int j = 0; j < img.rows; j++) {
            auto pos = Point(i, j);
            int cell_x = pos.x * 7 / img.cols;
            int cell_y = pos.y * 7 / img.rows;
            int idx = cell_y * 7 + cell_x;
            assert(idx < 49);

            uint8_t p = img.at<uint8_t>(pos);
            if (p) {
                ones[idx]++;
            } else {
                zeroes[idx]++;
            }
        }
    }
    printf("ones:\n");
    for (int i = 0; i < 49; i++) {
        printf("%d ", ones[i]);
    }
    printf("\n");
    vector<float> ret;
    for (int i = 0; i < 49; i++) {
        int cell_x = i % 7;
        int cell_y = i / 7;
        if (!cell_in_bg(cell_x, cell_y)) {
            continue;
        }
        if (ones[i] || zeroes[i]) {
            ret.push_back(ones[i] / (float)(ones[i] + zeroes[i]));
        } else {
            ret.push_back(0);
        }
    }
    return ret;
}

static
float mean(vector<float> &a)
{
    float sum = 0;

    if (!a.size()) {
        return 0;
    }

    for (auto x: a) {
        sum += x;
    }

    return sum / a.size();
}

static
float covariance(vector<float> &a, vector<float> &b)
{
    float mean_a = mean(a);
    float mean_b = mean(b);
    float ret = 0;

    if (a.size() != b.size()) return 0;

    for (size_t i = 0; i < a.size(); i++) {
        ret += (a[i] - mean_a) * (b[i] - mean_b);
    }
    return ret;
}

static inline
bool valid_point(Mat &a, Point p)
{
    return p.x > 0 && p.x < a.cols && p.y > 0 && p.y < a.rows;
}

static inline
bool fuzzy_pixel_match(Mat &a, Point pa, Mat &b, Point pb)
{
    if (!valid_point(a, pa) || !valid_point(b, pb)) return false;
    return a.at<uint8_t>(pa) == b.at<uint8_t>(pb);
}

static
int fuzzy_pixel_cmp(Mat &b, Mat &a)
{
    int ret = 0;
    int w = a.cols;
    int h = a.rows;
    assert(a.cols == b.cols);
    assert(a.rows == b.rows);
    for (int i = 0; i < w; i++) {
        for (int j = 0; j < h; j++) {
            Point p(i, j);
            if (!roi_in_bg(w, h, p)) {
                ret++;
                continue;
            }
            bool same = false;
            int fuzziness = 1;
            for (int ii = -fuzziness; ii <= fuzziness; ii++) {
                for (int jj = -fuzziness; jj <= fuzziness; jj++) {
                    if (fuzzy_pixel_match(a, p, b, Point(i + ii, j + jj))) {
                        same = true;
                        goto out;
                    }
                }
            }
out:
            ret += same ? 1 : 0;
        }
    }
    return ret;
}

double emblem_roi_similarity(SimilarityAlg alg, InputArray std_in, InputArray frame_roi_in, string &err)
{
    Mat stdm = *(Mat *)std_in.getObj();
    Mat frame_roi = *(Mat *)frame_roi_in.getObj();
    err = "";

    Mat frame_gray = adaptive_gray(frame_roi);
    Mat std_gray = adaptive_gray(stdm);

    resize(frame_gray, frame_gray, std_gray.size());

    double ret = 0;
    Mat frame = frame_gray.clone();
    Mat std = std_gray.clone();

    roi_mask(frame, 20);
    roi_mask(std, 30);

    double same = fuzzy_pixel_cmp(frame, std);
    double total = frame.rows * frame.cols;
    double sim = same / total;
    printf("same: %lf, total: %lf, sim: %lf\n", same, total, sim);

    auto std_feature = roi_extract_features(std);
    auto frame_feature = roi_extract_features(frame);

    printf("\nstd:");
    for (auto x: std_feature) {
        printf("%.2lf ", x * 100);
    }

    printf("\nfrm:");
    for (auto x: frame_feature) {
        printf("%.2lf ", x * 100);
    }
    printf("\n");

    double cov = covariance(std_feature, frame_feature);
    printf("cov: %lf\n", cov);
    double t = cov * sim;
    ret = ret > t ? ret : t;
    return ret;
}