themblem/alg/libqr.cpp
2025-03-02 15:58:08 +00:00

747 lines
20 KiB
C++

#include <iostream>
#include <string>
#include "libqr.h"
#include "opencv2/objdetect.hpp"
#include "opencv2/wechat_qrcode.hpp"
#include "string_format.h"
using namespace std;
using namespace cv;
static
vector<Point> transform_image(Mat &in, vector<Point> qr_points, Mat &out)
{
Mat src = (Mat_<float>(4, 2) <<
qr_points[0].x, qr_points[0].y,
qr_points[1].x, qr_points[1].y,
qr_points[2].x, qr_points[2].y,
qr_points[3].x, qr_points[3].y
);
int min_x = qr_points[0].x;
int min_y = qr_points[0].y;
int max_x = qr_points[0].x;
int max_y = qr_points[0].y;
for (auto p: qr_points) {
min_x = min(p.x, min_x);
min_y = min(p.y, min_y);
max_x = max(p.x, max_x);
max_y = max(p.y, max_y);
}
Mat dst = (Mat_<float>(4, 2) <<
min_x, min_y,
max_x, min_y,
max_x, max_y,
min_x, max_y);
Mat m = getPerspectiveTransform(src, dst);
warpPerspective(in, out, m, in.size());
vector<Point> ret;
ret.push_back(Point(min_x, min_y));
ret.push_back(Point(max_x, min_y));
ret.push_back(Point(max_x, max_y));
ret.push_back(Point(min_x, max_y));
return ret;
}
bool detect_qr(ProcessState &ps, float margin_ratio, bool warp, string &err)
{
#if WECHAT_QRCODE_USE_MODEL
auto wr = wechat_qrcode::WeChatQRCode(
"wechat_qrcode/detect.prototxt",
"wechat_qrcode/detect.caffemodel",
"wechat_qrcode/sr.prototxt",
"wechat_qrcode/sr.caffemodel");
#else
auto wr = wechat_qrcode::WeChatQRCode();
#endif
vector<Mat> qrs;
auto r = wr.detectAndDecode(ps.preprocessed, qrs);
if (!r.size()) {
err = "qr not detected";
return false;
}
ps.qrcode = r[0];
auto rect = qrs[0];
vector<Point> qr_points;
qr_points.push_back(Point(rect.at<float>(0, 0) / ps.scale, rect.at<float>(0, 1) / ps.scale));
qr_points.push_back(Point(rect.at<float>(1, 0) / ps.scale, rect.at<float>(1, 1) / ps.scale));
qr_points.push_back(Point(rect.at<float>(2, 0) / ps.scale, rect.at<float>(2, 1) / ps.scale));
qr_points.push_back(Point(rect.at<float>(3, 0) / ps.scale, rect.at<float>(3, 1) / ps.scale));
ps.qr_points = qr_points;
Mat warped;
vector<Point> warped_qr_points;
if (warp) {
warped_qr_points = transform_image(*ps.orig, qr_points, warped);
} else {
warped = *ps.orig;
warped_qr_points = qr_points;
}
int min_x = warped_qr_points[0].x;
int min_y = warped_qr_points[0].y;
int max_x = min_x;
int max_y = min_y;
for (auto p: warped_qr_points) {
min_x = min(p.x, min_x);
min_y = min(p.y, min_y);
max_x = max(p.x, max_x);
max_y = max(p.y, max_y);
}
int margin = (max_x - min_x) * margin_ratio;
if (min_y < margin || min_x < margin || max_x + margin >= warped.cols || max_y + margin >= warped.rows) {
err = "qr margin too small";
return false;
}
int qr_width = max_x - min_x;
int qr_height = max_y - min_y;
if (qr_width < 200 && qr_height < 200 && qr_width < ps.orig->cols * 0.5 && qr_height < ps.orig->rows * 0.5) {
printf("(%d, %d) in (%d, %d)\n", qr_width, qr_height, ps.orig->cols, ps.orig->rows);
err = "qr too small";
return false;
}
Rect qr_rect(min_x, min_y, max_x - min_x, max_y - min_y);
ps.qr_straighten = warped(qr_rect);
Rect qr_with_margin_rect(min_x - margin, min_y - margin,
max_x - min_x + margin * 2,
max_y - min_y + margin * 2);
ps.straighten = warped(qr_with_margin_rect);
Mat g;
cvtColor(ps.straighten, g, COLOR_BGR2GRAY);
equalizeHist(g, g);
Rect dot_rect;
dot_rect.x = 0;
dot_rect.y = 0;
dot_rect.width = margin / 2;
dot_rect.height = margin / 2;
ps.dot_area = ps.straighten(dot_rect);
Mat dot_area_gray = g(dot_rect);
resize(dot_area_gray, ps.dot_area_gray, Size(64, 64));
return true;
}
bool preprocess(ProcessState &ps)
{
Mat gray;
cvtColor(*ps.orig, gray, COLOR_BGR2GRAY);
ps.scale = 1.0;
const float size_cap = 512;
if (ps.orig->rows > size_cap) {
ps.scale = size_cap / ps.orig->rows;
}
if (ps.orig->cols > ps.orig->rows && ps.orig->cols > size_cap) {
ps.scale = size_cap / ps.orig->cols;
}
resize(gray, ps.preprocessed, Size(), ps.scale, ps.scale);
return true;
}
struct EnergyGradient {
double x;
double y;
};
static
EnergyGradient energy_gradient(Mat &gray_img)
{
Mat smd_image_x, smd_image_y, G;
Mat kernel_x(3, 3, CV_32F, Scalar(0));
kernel_x.at<float>(1, 2) = -1.0;
kernel_x.at<float>(1, 1) = 1.0;
Mat kernel_y(3, 3, CV_32F, Scalar(0));
kernel_y.at<float>(1, 1) = 1.0;
kernel_y.at<float>(2, 1) = -1.0;
filter2D(gray_img, smd_image_x, gray_img.depth(), kernel_x);
filter2D(gray_img, smd_image_y, gray_img.depth(), kernel_y);
multiply(smd_image_x, smd_image_x, smd_image_x);
multiply(smd_image_y, smd_image_y, smd_image_y);
EnergyGradient ret = { mean(smd_image_x)[0], mean(smd_image_y)[0], };
return ret;
}
static
bool check_blur_by_energy_gradient(Mat &gray, string &err)
{
const int thres = 85;
auto a = energy_gradient(gray);
float angle = 45;
auto m = getRotationMatrix2D(Point2f(gray.cols / 2, gray.rows / 2), angle, 1.0);
Mat rotated;
warpAffine(gray, rotated, m, gray.size());
auto b = energy_gradient(rotated);
auto diffa = fabs(a.x - a.y);
auto diffb = fabs(b.x - b.y);
auto diffa_percent = 100 * diffa / max(a.x, a.y);
auto diffb_percent = 100 * diffb / max(b.x, b.y);
bool ret =
((a.x > thres && a.y > thres) || (b.x > thres && b.y > thres)) &&
diffa_percent < 15 && diffb_percent < 15;
cout << "energy: "
+ to_string(a.x) + " "
+ to_string(a.y) + " "
+ to_string(b.x) + " "
+ to_string(b.y) << endl;
if (!ret) {
err = "energy: "
+ to_string(a.x) + " "
+ to_string(a.y) + " "
+ to_string(b.x) + " "
+ to_string(b.y);
}
return ret;
}
double laplacian(Mat &gray, string &err)
{
int ddepth = CV_16S;
Mat check, lap;
GaussianBlur(gray, check, Size(5, 5), 0, 0, BORDER_DEFAULT);
Laplacian(check, lap, ddepth, 3);
convertScaleAbs(lap, lap);
Mat mean, stddev;
meanStdDev(lap, mean, stddev);
if (stddev.cols * stddev.rows == 1) {
double area = gray.rows * gray.cols;
double sd = stddev.at<double>(0, 0);
double var = sd * sd;
return var / area;
}
err = "wrong shape of stddev result";
return -1;
}
static
bool check_blur_by_laplacian(ProcessState &ps, Mat &gray, string &err)
{
auto var = laplacian(gray, err);
if (var < 0) return false;
ps.clarity = var;
if (var <= ps.laplacian_thres) {
err = string_format("image (%d x %d) too blurry: %lf <= %lf",
gray.cols, gray.rows,
var, ps.laplacian_thres
);
return false;
}
return true;
}
static
bool check_blur_by_sobel(ProcessState &ps, Mat &gray, string &err)
{
const float thres = 15;
Mat sobel_x, sobel_y;
Sobel(gray, sobel_x, CV_64F, 1, 0, 3);
Sobel(gray, sobel_y, CV_64F, 0, 1, 3);
Mat magnitude;
magnitude = sobel_x.mul(sobel_x) + sobel_y.mul(sobel_y); // Gx^2 + Gy^2
sqrt(magnitude, magnitude); // sqrt(Gx^2 + Gy^2)
Scalar meanMagnitude = mean(magnitude);
double sharpness = meanMagnitude[0] / (gray.rows * gray.cols) * 1000;
if (sharpness < thres) {
err = string_format("image too blurry: %lf < %lf", sharpness, thres);
return false;
}
return true;
}
static
bool check_sharpness(ProcessState &ps, Mat &gray, int method, string &err)
{
if (method == 0) {
return check_blur_by_laplacian(ps, gray, err);
} else if (method == 1) {
return check_blur_by_energy_gradient(gray, err);
} else if (method == 2) {
return check_blur_by_sobel(ps, gray, err);
}
err = "unknown sharpness method: " + to_string(method);
return false;
}
#define COUNT_COMPONENTS 0
#if COUNT_COMPONENTS
static bool is_valid_pattern(Mat &img)
{
Mat labels;
Mat stats;
Mat centroids;
connectedComponentsWithStats(img, labels, stats, centroids);
int valid = 0;
for (auto i = 0; i < stats.rows; i++) {
int area = stats.at<int>(i, CC_STAT_AREA);
if (area > 5) {
valid++;
}
}
return valid > 25;
}
#endif
static
int find_score(Mat &img)
{
int ret = 0;
for (int row = 0; row < img.rows; row++) {
int row_sum = 0;
for (int col = 0; col < img.cols; col++) {
auto p = img.at<bool>(row, col);
if (p) {
row_sum += 1;
}
}
if (row_sum) {
ret += 1;
}
}
return ret;
}
static void clear_connected(Mat &bin, Point p)
{
vector<Point> q;
q.push_back(p);
while (q.size()) {
auto p = q[q.size() - 1];
q.pop_back();
bin.at<uint8_t>(p.y, p.x) = 0;
for (int i = -1; i <= 1; i++) {
for (int j = -1; j <= 1; j++) {
int nx = p.x + i;
int ny = p.y + j;
if (nx < 0 || nx >= bin.cols || ny < 0 || ny >= bin.rows) {
continue;
}
if (bin.at<bool>(ny, nx)) {
q.push_back(Point(nx, ny));
}
}
}
}
}
static
vector<Point> find_points(Mat bin)
{
vector<Point> ret;
for (int x = 0; x < bin.cols; x++) {
for (int y = 0; y < bin.rows; y++) {
auto p = bin.at<uint8_t>(y, x);
if (!p) continue;
auto point = Point(x, y);
ret.push_back(point);
clear_connected(bin, point);
}
}
return ret;
}
static
int adaptive_erode(Mat &bin, Mat &eroded, string &err)
{
auto kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
const int min_points = 25;
int max_erodes = 5;
printf("adaptiveThreshold\n");
eroded = bin.clone();
while (max_erodes-- > 0) {
// In this loop we erode a "full" image in order to get enough detached components
auto points = find_points(bin.clone());
printf("points: %zu\n", points.size());
if (points.size() == 0) {
err = "cannot find enough points";
return -1;
}
if (points.size() > min_points) {
break;
}
erode(eroded, eroded, kernel);
}
while (max_erodes-- > 0) {
// In this loop we further erode a "lean" image in order to get clarity until it's too much
Mat next;
erode(eroded, next, kernel);
auto points = find_points(next.clone());
if (points.size() < min_points) {
break;
}
eroded = next;
}
return 0;
}
static
int emblem_detect_angle(Mat &gray, bool check_orthogonal, string &err)
{
Mat bin;
int min_score = gray.cols;
int max_score = 0;
int lowest_score_angle = -1;
adaptiveThreshold(gray, bin, 255, ADAPTIVE_THRESH_GAUSSIAN_C, THRESH_BINARY_INV, 11, 2);
Mat inverted;
bitwise_not(bin, inverted);
const int MAX_ROT_ANGLE = 180;
int scores[MAX_ROT_ANGLE] = { 0 };
const int score_diff_thres = 5;
Mat eroded;
adaptive_erode(bin, eroded, err);
for (int angle = 0; angle < MAX_ROT_ANGLE; angle += 1) {
auto m = getRotationMatrix2D(Point2f(gray.cols / 2, gray.rows / 2), angle, 1.0);
Mat rotated;
warpAffine(eroded, rotated, m, gray.size());
int score = find_score(rotated);
scores[angle] = score;
if (score < min_score) {
lowest_score_angle = angle;
}
min_score = min(score, min_score);
max_score = max(max_score, score);
}
if (max_score - min_score > score_diff_thres) {
int orthogonal_angle = lowest_score_angle + 90;
if (orthogonal_angle > 180) {
orthogonal_angle -= 180;
}
int orthogonal_score = scores[orthogonal_angle];
printf("lowest_score_angle %d, min score %d, max score %d, orthogonal_angle %d, orthogonal score: %d\n",
lowest_score_angle, min_score, max_score, orthogonal_angle, orthogonal_score);
lowest_score_angle = lowest_score_angle > 90 ? lowest_score_angle - 90 : lowest_score_angle;
if (lowest_score_angle > 45)
lowest_score_angle = 90 - lowest_score_angle;
if (max_score - orthogonal_score > score_diff_thres || !check_orthogonal) {
return lowest_score_angle;
}
}
return -1;
}
bool emblem_dot_angle(ProcessState &ps, InputArray in, float &angle, string &qrcode, string &err)
{
try {
ps.orig = (Mat *)in.getObj();
preprocess(ps);
if (!detect_qr(ps, 0.20, true, err)) {
qrcode = ps.qrcode;
err = "detect_qr: " + err;
return false;
}
qrcode = ps.qrcode;
if (!check_sharpness(ps, ps.dot_area_gray, ps.sharpness_method, err)) {
return false;
}
int a = emblem_detect_angle(ps.dot_area_gray, false, err);
if (a > 0) {
angle = a;
return true;
} else {
err = "cannot detect angle";
return false;
}
} catch (const std::exception &exc) {
std::cout << exc.what() << std::endl;
err = "exception";
return false;
} catch (...) {
err = "unknown error";
return false;
}
}
static
Mat adaptive_gray(Mat &img)
{
Mat ret;
Mat mean, stddev;
Mat channels[3];
Mat hsv_img;
meanStdDev(img, mean, stddev);
int bgr_max_std_channel = 0;
float bgr_max_std = stddev.at<float>(0, 0);
for (int i = 1; i < 3; i++) {
auto nv = stddev.at<float>(0, i);
if (nv > bgr_max_std_channel) {
bgr_max_std_channel = i;
bgr_max_std = nv;
}
}
cvtColor(img, hsv_img, COLOR_BGR2HSV);
meanStdDev(img, hsv_img, stddev);
int hsv_max_std_channel = 0;
float hsv_max_std = stddev.at<float>(0, 0);
for (int i = 1; i < 3; i++) {
auto nv = stddev.at<float>(0, i);
if (nv > hsv_max_std_channel) {
hsv_max_std_channel = i;
hsv_max_std = nv;
}
}
if (hsv_max_std > bgr_max_std) {
split(hsv_img, channels);
printf("using hsv channel %d\n", hsv_max_std_channel);
ret = channels[hsv_max_std_channel];
} else {
split(img, channels);
printf("using rgb channel %d\n", bgr_max_std_channel);
ret = channels[bgr_max_std_channel];
}
return ret;
}
static
bool cell_in_bg(int cell_x, int cell_y)
{
return
(cell_x == 1 && (cell_y > 0 && cell_y < 6)) ||
(cell_x == 2 && (cell_y == 1 || cell_y == 5)) ||
(cell_x == 3 && (cell_y == 1 || cell_y == 5)) ||
(cell_x == 4 && (cell_y == 1 || cell_y == 5)) ||
(cell_x == 5 && cell_y > 0 && cell_y < 6)
;
}
static
bool roi_in_bg(int w, int h, Point p)
{
int cell_x = p.x * 7 / w;
int cell_y = p.y * 7 / h;
return cell_in_bg(cell_x, cell_y);
}
static
void roi_mask(Mat &img, int margin_pct)
{
int counts[256] = { 0 };
for (int i = 0; i < img.cols; i++) {
for (int j = 0; j < img.rows; j++) {
uint8_t p = img.at<uint8_t>(Point(i, j));
counts[p]++;
}
}
int cut = 20;
int seen = 0;
int total = img.cols * img.rows;
int p05, p95;
for (p05 = 0; seen < total * cut / 100 && p05 < 256; p05++) {
seen += counts[p05];
}
seen = 0;
for (p95 = 0; seen < total * (100 - cut) / 100 && p95 < 256; p95++) {
seen += counts[p95];
}
printf("p05: %d, p95: %d\n", p05, p95);
int cap = (p95 - p05) * margin_pct / 100;
int min_thres = p05 + cap;
int max_thres = p95 - cap;
for (int i = 0; i < img.cols; i++) {
for (int j = 0; j < img.rows; j++) {
auto pos = Point(i, j);
uint8_t p = img.at<uint8_t>(pos);
if (!roi_in_bg(img.cols, img.rows, pos)) {
img.at<uint8_t>(pos) = 0;
} else if (p < min_thres) {
img.at<uint8_t>(pos) = 0;
} else if (p > max_thres) {
img.at<uint8_t>(pos) = 0;
} else {
img.at<uint8_t>(pos) = 255;
}
}
}
}
static
vector<float> roi_extract_features(Mat &img)
{
vector<int> ones(49, 0);
vector<int> zeroes(49, 0);
for (int i = 0; i < img.cols; i++) {
for (int j = 0; j < img.rows; j++) {
auto pos = Point(i, j);
int cell_x = pos.x * 7 / img.cols;
int cell_y = pos.y * 7 / img.rows;
int idx = cell_y * 7 + cell_x;
assert(idx < 49);
uint8_t p = img.at<uint8_t>(pos);
if (p) {
ones[idx]++;
} else {
zeroes[idx]++;
}
}
}
printf("ones:\n");
for (int i = 0; i < 49; i++) {
printf("%d ", ones[i]);
}
printf("\n");
vector<float> ret;
for (int i = 0; i < 49; i++) {
int cell_x = i % 7;
int cell_y = i / 7;
if (!cell_in_bg(cell_x, cell_y)) {
continue;
}
if (ones[i] || zeroes[i]) {
ret.push_back(ones[i] / (float)(ones[i] + zeroes[i]));
} else {
ret.push_back(0);
}
}
return ret;
}
static
float mean(vector<float> &a)
{
float sum = 0;
if (!a.size()) {
return 0;
}
for (auto x: a) {
sum += x;
}
return sum / a.size();
}
static
float covariance(vector<float> &a, vector<float> &b)
{
float mean_a = mean(a);
float mean_b = mean(b);
float ret = 0;
if (a.size() != b.size()) return 0;
for (size_t i = 0; i < a.size(); i++) {
ret += (a[i] - mean_a) * (b[i] - mean_b);
}
return ret;
}
static inline
bool valid_point(Mat &a, Point p)
{
return p.x > 0 && p.x < a.cols && p.y > 0 && p.y < a.rows;
}
static inline
bool fuzzy_pixel_match(Mat &a, Point pa, Mat &b, Point pb)
{
if (!valid_point(a, pa) || !valid_point(b, pb)) return false;
return a.at<uint8_t>(pa) == b.at<uint8_t>(pb);
}
static
int fuzzy_pixel_cmp(Mat &b, Mat &a)
{
int ret = 0;
int w = a.cols;
int h = a.rows;
assert(a.cols == b.cols);
assert(a.rows == b.rows);
for (int i = 0; i < w; i++) {
for (int j = 0; j < h; j++) {
Point p(i, j);
if (!roi_in_bg(w, h, p)) {
ret++;
continue;
}
bool same = false;
int fuzziness = 1;
for (int ii = -fuzziness; ii <= fuzziness; ii++) {
for (int jj = -fuzziness; jj <= fuzziness; jj++) {
if (fuzzy_pixel_match(a, p, b, Point(i + ii, j + jj))) {
same = true;
goto out;
}
}
}
out:
ret += same ? 1 : 0;
}
}
return ret;
}
double emblem_roi_similarity(SimilarityAlg alg, InputArray std_in, InputArray frame_roi_in, string &err)
{
Mat stdm = *(Mat *)std_in.getObj();
Mat frame_roi = *(Mat *)frame_roi_in.getObj();
err = "";
Mat frame_gray = adaptive_gray(frame_roi);
Mat std_gray = adaptive_gray(stdm);
resize(frame_gray, frame_gray, std_gray.size());
double ret = 0;
Mat frame = frame_gray.clone();
Mat std = std_gray.clone();
roi_mask(frame, 20);
roi_mask(std, 30);
double same = fuzzy_pixel_cmp(frame, std);
double total = frame.rows * frame.cols;
double sim = same / total;
printf("same: %lf, total: %lf, sim: %lf\n", same, total, sim);
auto std_feature = roi_extract_features(std);
auto frame_feature = roi_extract_features(frame);
printf("\nstd:");
for (auto x: std_feature) {
printf("%.2lf ", x * 100);
}
printf("\nfrm:");
for (auto x: frame_feature) {
printf("%.2lf ", x * 100);
}
printf("\n");
double cov = covariance(std_feature, frame_feature);
printf("cov: %lf\n", cov);
double t = cov * sim;
ret = ret > t ? ret : t;
return ret;
}