fanzetao-face_detect
/
face_detect.cpp

#include "face_detect.h"

#include <dlib/dnn.h>
#include <dlib/string.h>
#include <dlib/image_io.h>
#include <dlib/clustering.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/opencv.h>
#include <opencv2/opencv.hpp>
#include <ctime>

using namespace dlib;
using namespace std;

template<template<int, template<typename> class, int, typename> class block, int N,
        template<typename> class BN, typename SUBNET>
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;

template<template<int, template<typename> class, int, typename> class block, int N,
        template<typename> class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;

template<int N, template<typename> class BN, int stride, typename SUBNET>
using block  = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;

template<int N, typename SUBNET> using ares      = relu<residual<block, N, affine, SUBNET>>;
template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;

template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>;
template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;
template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;
template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;
template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
        alevel0<
                alevel1<
                        alevel2<
                                alevel3<
                                        alevel4<
                                                max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2,
                                                        input_rgb_image_sized<150>
                                                >>>>>>>>>>>>;

class face_rectangle {
public:
    cv_image<bgr_pixel> cv_img;
    dlib::rectangle rect;
};
//global

static frontal_face_detector global_detector;

static shape_predictor global_sp;

static anet_type global_net;

static bool loaded = false;

template <class T>
int getArrayLen(T& array) {
    return (sizeof(array) / sizeof(array[0]));
}

std::vector<matrix<rgb_pixel>> jitter_image(
        const matrix<rgb_pixel> &img
) {
    thread_local dlib::rand rnd;
    std::vector<matrix<rgb_pixel>> crops;
    for (int i = 0; i < 100; ++i)
        crops.push_back(jitter_image(img, rnd));
    return crops;
}

void split_string(const string &s, std::vector<string> &v, const string &c) {
    string::size_type pos1, pos2;
    pos2 = s.find(c);
    pos1 = 0;
    while (string::npos != pos2) {
        v.push_back(s.substr(pos1, pos2 - pos1));

        pos1 = pos2 + c.size();
        pos2 = s.find(c, pos1);
    }
    if (pos1 != s.length())
        v.push_back(s.substr(pos1));
}

matrix<float> csv_string_to_matrix(string csv_string) {

    std::vector<string> v_string;

    split_string(csv_string, v_string, "\n");

    std::vector<float> matrix_elements;

    for (string s : v_string) {
        matrix_elements.push_back(atof(s.c_str()));
    }

    matrix<float> dest(1, matrix_elements.size());

    int j = 0;

    std::vector<float>::iterator it;

    for (it = matrix_elements.begin(); it != matrix_elements.end(); it++) {
        dest(0, j) = *it;
        j++;
    }

    return dest;
}

string face_base64_encode(matrix<float> source) {

    ostringstream source_out;

    print_matrix_as_csv_helper p(source_out);

    p.operator<<(source);

    string source_str = source_out.str();

    stringstream base64_sin;

    base64_sin << source_str;

    ostringstream base64_out;

    base64 base64_coder;

    base64_coder.encode(base64_sin, base64_out);

    return base64_out.str();
}

matrix<float> face_base64_decode(string source) {

    base64 base64_coder;

    stringstream base64_sin;

    ostringstream base64_out;

    base64_sin << source;

    base64_coder.decode(base64_sin, base64_out);

    return csv_string_to_matrix(base64_out.str());
}

void gonghui_face::load_model(
        string face_landmarks_model_path,
        string face_rec_model_path
) {
    if(loaded) {
        return ;
    }
    global_detector = get_frontal_face_detector();
    deserialize(face_landmarks_model_path) >> global_sp;
    deserialize(face_rec_model_path) >> global_net;

    loaded = true;
}

int gonghui_face::face_detect_count(string face_image_path) {

    if(!loaded) {
        throw CODE_NOT_LOADED;
    }

    matrix<rgb_pixel> img;

    load_image(img, face_image_path);

    std::vector<dlib::rectangle> dets = global_detector(img);

    if (dets.size() == 0) {
        //make smaller to detect again
        pyramid_up(img);

        dets = global_detector(img);
    }

    if (dets.size() == 0) {
        cout << "face not found: " << face_image_path << endl;
        throw CODE_FACE_NOT_FOUND;
    }

    return dets.size();
}

string gonghui_face::face_detect_frame_with_descriptor(
        std::vector<face_frame> face_frames
) {

    std::vector<cv::Mat> frames;

    cout << "convert arrays start" << endl;

    int i=0;
    for(face_frame uc : face_frames) {
        cv::Mat img(cv::Size(uc.width,uc.height), CV_8UC3, uc.p);
        frames.push_back(img);

        imwrite ("/home/fanfan/images/after_" + to_string(i) + ".jpg", img);

        i++;
    }

    cout << "convert arrays end" << endl;

    if(!loaded) {
        cout << "CODE_NOT_LOADED" << endl;
        throw CODE_NOT_LOADED;
    }

    int total = frames.size();

    std::vector<face_rectangle> faces;

    std::vector<matrix<rgb_pixel>> face_chips;

    std::vector<cv::Mat>::iterator it;

    for (it = frames.begin(); it != frames.end(); it++) {

        face_rectangle fr;

        cv::Mat frame = *it;

        cv_image<bgr_pixel> cv_img(frame);

        std::vector<dlib::rectangle> face_vector = global_detector(cv_img);

        dlib::rectangle face_one = face_vector[0];

        if(face_vector.size() == 0) {
            cout << "CODE_FACE_NOT_FOUND" << endl;
            throw CODE_FACE_NOT_FOUND;
        }

        if(face_vector.size() > 1) {
            for(dlib::rectangle face : face_vector) {
                if(face_one.area() < face.area()) {
                    face_one = face;
                }
            }
        }

        fr.cv_img = cv_img;
        fr.rect = face_one;
        faces.push_back(fr);
    }

    face_rectangle face_big = faces[0];

    for(face_rectangle face : faces) {
        if(face_big.rect.area() < face.rect.area()) {
            face_big = face;
        }
    }

    auto shape = global_sp(face_big.cv_img, face_big.rect);

    matrix<rgb_pixel> face_chip;

    extract_image_chip(face_big.cv_img, get_face_chip_details(shape, 150, 0.25), face_chip);

    face_chips.push_back(move(face_chip));

    std::vector<matrix<float, 0, 1>> face_descriptors = global_net(face_chips);

    return face_base64_encode(face_descriptors[0]);
}

string gonghui_face::get_face_descriptor(
        string face_image_path
) {

    if(!loaded) {
        throw CODE_NOT_LOADED;
    }

    matrix<rgb_pixel> img;

    load_image(img, face_image_path);

    std::vector<dlib::rectangle> dets = global_detector(img);

    if (dets.size() == 0) {
        //make smaller to detect again
        pyramid_up(img);

        dets = global_detector(img);
    }

    if (dets.size() == 0) {
        cout << "face not found: " << face_image_path << endl;
        throw CODE_FACE_NOT_FOUND;
    }

    auto shape = global_sp(img, dets[0]);

    matrix<rgb_pixel> face_chip;

    extract_image_chip(img, get_face_chip_details(shape, 150, 0.25), face_chip);

    std::vector<matrix<rgb_pixel>> faces;

    faces.push_back(move(face_chip));

    std::vector<matrix<float, 0, 1>> face_descriptors = global_net(faces);

    return face_base64_encode(face_descriptors[0]);
}

double gonghui_face::compare_face(string face_desc_str_1, string face_desc_str_2, double similar) {

    if(!loaded) {
        throw CODE_NOT_LOADED;
    }

    matrix<float> m1 = face_base64_decode(face_desc_str_1);

    matrix<float> m2 = face_base64_decode(face_desc_str_2);

    double distance = length(m1 - m2);

    cout << similar << ":" << distance << endl;

    if(distance < similar) {
        cout << "similar" << endl;
        return distance;
    }else {
        cout << "not similar" << endl;
        return -1;
    }
}