上传原版本C++训练文件

devA
yuxue 5 years ago
parent 6111a17f48
commit 129fe13a27

@ -0,0 +1,344 @@
#include <numeric>
#include <ctime>
#include "easypr/train/ann_train.h"
#include "easypr/config.h"
#include "easypr/core/chars_identify.h"
#include "easypr/core/feature.h"
#include "easypr/core/core_func.h"
#include "easypr/train/create_data.h"
#include "easypr/util/util.h"
// 原版C++语言 训练代码
namespace easypr {
AnnTrain::AnnTrain(const char* chars_folder, const char* xml): chars_folder_(chars_folder), ann_xml_(xml) {
ann_ = cv::ml::ANN_MLP::create();
type = 0; // type=0, 所有字符type=1, 只有中文字符
kv_ = std::shared_ptr<Kv>(new Kv);
kv_->load("resources/text/province_mapping"); // zh_cuan 川 zh_gan1 甘
}
void AnnTrain::train() {
int classNumber = 0;
cv::Mat layers;
int input_number = 0;
int hidden_number = 0;
int output_number = 0;
if (type == 0) {
classNumber = kCharsTotalNumber;
input_number = kAnnInput;
hidden_number = kNeurons;
output_number = classNumber;
} else if (type == 1) {
classNumber = kChineseNumber;
input_number = kAnnInput;
hidden_number = kNeurons;
output_number = classNumber;
}
int N = input_number;
int m = output_number;
int first_hidden_neurons = int(std::sqrt((m + 2) * N) + 2 * std::sqrt(N / (m + 2)));
int second_hidden_neurons = int(m * std::sqrt(N / (m + 2)));
bool useTLFN = false;
if (!useTLFN) {
layers.create(1, 3, CV_32SC1);
layers.at<int>(0) = input_number;
layers.at<int>(1) = hidden_number;
layers.at<int>(2) = output_number;
} else {
// 两层神经网络很难训练,所以不要尝试
fprintf(stdout, ">> Use two-layers neural networks,\n");
layers.create(1, 4, CV_32SC1);
layers.at<int>(0) = input_number;
layers.at<int>(1) = first_hidden_neurons;
layers.at<int>(2) = second_hidden_neurons;
layers.at<int>(3) = output_number;
}
ann_->setLayerSizes(layers);
ann_->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM, 1, 1);
ann_->setTrainMethod(cv::ml::ANN_MLP::TrainingMethods::BACKPROP);
ann_->setTermCriteria(cvTermCriteria(CV_TERMCRIT_ITER, 30000, 0.0001));
ann_->setBackpropWeightScale(0.1);
ann_->setBackpropMomentumScale(0.1);
auto files = Utils::getFiles(chars_folder_);
if (files.size() == 0) {
fprintf(stdout, "No file found in the train folder!\n");
return;
}
//using raw data or raw + synthic data.
auto traindata = sdata(350);
ann_->train(traindata);
ann_->save(ann_xml_);
test();
}
// 识别中文
std::pair<std::string, std::string> AnnTrain::identifyChinese(cv::Mat input) {
cv::Mat feature = charFeatures2(input, kPredictSize);
float maxVal = -2;
int result = 0;
cv::Mat output(1, kChineseNumber, CV_32FC1);
ann_->predict(feature, output);
for (int j = 0; j < kChineseNumber; j++) {
float val = output.at<float>(j);
if (val > maxVal) {
maxVal = val;
result = j;
}
}
auto index = result + kCharsTotalNumber - kChineseNumber;
const char* key = kChars[index];
std::string s = key;
std::string province = kv_->get(s);
return std::make_pair(s, province);
}
std::pair<std::string, std::string> AnnTrain::identify(cv::Mat input) {
cv::Mat feature = charFeatures2(input, kPredictSize);
float maxVal = -2;
int result = 0;
//std::cout << feature << std::endl;
cv::Mat output(1, kCharsTotalNumber, CV_32FC1);
ann_->predict(feature, output);
//std::cout << output << std::endl;
for (int j = 0; j < kCharsTotalNumber; j++) {
float val = output.at<float>(j);
//std::cout << "j:" << j << "val:" << val << std::endl;
if (val > maxVal) {
maxVal = val;
result = j;
}
}
auto index = result;
if (index < kCharactersNumber) {
return std::make_pair(kChars[index], kChars[index]);
}
else {
const char* key = kChars[index];
std::string s = key;
std::string province = kv_->get(s);
return std::make_pair(s, province);
}
}
void AnnTrain::test() {
assert(chars_folder_);
int classNumber = 0;
if (type == 0) classNumber = kCharsTotalNumber;
if (type == 1) classNumber = kChineseNumber;
int corrects_all = 0, sum_all = 0;
std::vector<float> rate_list;
for (int i = 0; i < classNumber; ++i) {
auto char_key = kChars[i + kCharsTotalNumber - classNumber];
char sub_folder[512] = { 0 };
sprintf(sub_folder, "%s/%s", chars_folder_, char_key);
fprintf(stdout, ">> Testing characters %s in %s \n", char_key, sub_folder);
auto chars_files = utils::getFiles(sub_folder);
int corrects = 0, sum = 0;
std::vector<std::pair<std::string, std::string>> error_files;
for (auto file : chars_files) {
auto img = cv::imread(file, 0); // a grayscale image
if (!img.data) {
//cout << "Null pointer!" << endl;
continue;
}
std::pair<std::string, std::string> ch;
if (type == 0) ch = identify(img);
if (type == 1) ch = identifyChinese(img);
if (ch.first == char_key) {
++corrects;
++corrects_all;
} else {
error_files.push_back(std::make_pair(utils::getFileName(file), ch.second));
}
++sum;
++sum_all;
}
float rate = (float)corrects / (sum == 0 ? 1 : sum);
fprintf(stdout, ">> [sum: %d, correct: %d, rate: %.2f]\n", sum, corrects, rate);
rate_list.push_back(rate);
std::string error_string;
auto end = error_files.end();
if (error_files.size() >= 10) {
end -= static_cast<size_t>(error_files.size() * (1 - 0.1));
}
for (auto k = error_files.begin(); k != end; ++k) {
auto kv = *k;
error_string.append(" ").append(kv.first).append(": ").append(
kv.second);
if (k != end - 1) {
error_string.append(",\n");
} else {
error_string.append("\n ...");
}
}
fprintf(stdout, ">> [\n%s\n ]\n", error_string.c_str());
}
fprintf(stdout, ">> [sum_all: %d, correct_all: %d, rate: %.4f]\n", sum_all, corrects_all,
(float)corrects_all / (sum_all == 0 ? 1 : sum_all));
double rate_sum = std::accumulate(rate_list.begin(), rate_list.end(), 0.0);
double rate_mean = rate_sum / (rate_list.size() == 0 ? 1 : rate_list.size());
fprintf(stdout, ">> [classNumber: %d, avg_rate: %.4f]\n", classNumber, rate_mean);
}
cv::Mat getSyntheticImage(const Mat& image) {
int rand_type = rand();
Mat result = image.clone();
if (rand_type % 2 == 0) {
int ran_x = rand() % 5 - 2;
int ran_y = rand() % 5 - 2;
result = translateImg(result, ran_x, ran_y);
}
else if (rand_type % 2 != 0) {
float angle = float(rand() % 15 - 7);
result = rotateImg(result, angle);
}
return result;
}
cv::Ptr<cv::ml::TrainData> AnnTrain::sdata(size_t number_for_count) {
assert(chars_folder_);
cv::Mat samples;
std::vector<int> labels;
int classNumber = 0;
if (type == 0) classNumber = kCharsTotalNumber;
if (type == 1) classNumber = kChineseNumber;
srand((unsigned)time(0));
for (int i = 0; i < classNumber; ++i) {
auto char_key = kChars[i + kCharsTotalNumber - classNumber];
char sub_folder[512] = { 0 };
sprintf(sub_folder, "%s/%s", chars_folder_, char_key);
fprintf(stdout, ">> Testing characters %s in %s \n", char_key, sub_folder);
auto chars_files = utils::getFiles(sub_folder);
size_t char_size = chars_files.size();
fprintf(stdout, ">> Characters count: %d \n", int(char_size));
std::vector<cv::Mat> matVec;
matVec.reserve(number_for_count);
for (auto file : chars_files) {
auto img = cv::imread(file, 0); // a grayscale image
matVec.push_back(img);
}
for (int t = 0; t < (int)number_for_count - (int)char_size; t++) {
int rand_range = char_size + t;
int ran_num = rand() % rand_range;
auto img = matVec.at(ran_num);
auto simg = getSyntheticImage(img);
matVec.push_back(simg);
if (1) {
std::stringstream ss(std::stringstream::in | std::stringstream::out);
ss << sub_folder << "/" << i << "_" << t << "_" << ran_num << ".jpg";
imwrite(ss.str(), simg);
}
}
fprintf(stdout, ">> Characters count: %d \n", (int)matVec.size());
for (auto img : matVec) {
auto fps = charFeatures2(img, kPredictSize);
samples.push_back(fps);
labels.push_back(i);
}
}
cv::Mat samples_;
samples.convertTo(samples_, CV_32F);
cv::Mat train_classes =
cv::Mat::zeros((int)labels.size(), classNumber, CV_32F);
for (int i = 0; i < train_classes.rows; ++i) {
train_classes.at<float>(i, labels[i]) = 1.f;
}
return cv::ml::TrainData::create(samples_, cv::ml::SampleTypes::ROW_SAMPLE,
train_classes);
}
cv::Ptr<cv::ml::TrainData> AnnTrain::tdata() {
assert(chars_folder_);
cv::Mat samples;
std::vector<int> labels;
std::cout << "Collecting chars in " << chars_folder_ << std::endl;
int classNumber = 0;
if (type == 0) classNumber = kCharsTotalNumber;
if (type == 1) classNumber = kChineseNumber;
for (int i = 0; i < classNumber; ++i) {
auto char_key = kChars[i + kCharsTotalNumber - classNumber];
char sub_folder[512] = {0};
sprintf(sub_folder, "%s/%s", chars_folder_, char_key);
std::cout << " >> Featuring characters " << char_key << " in "
<< sub_folder << std::endl;
auto chars_files = utils::getFiles(sub_folder);
for (auto file : chars_files) {
auto img = cv::imread(file, 0); // a grayscale image
auto fps = charFeatures2(img, kPredictSize);
samples.push_back(fps);
labels.push_back(i);
}
}
cv::Mat samples_;
samples.convertTo(samples_, CV_32F);
cv::Mat train_classes =
cv::Mat::zeros((int)labels.size(), classNumber, CV_32F);
for (int i = 0; i < train_classes.rows; ++i) {
train_classes.at<float>(i, labels[i]) = 1.f;
}
return cv::ml::TrainData::create(samples_, cv::ml::SampleTypes::ROW_SAMPLE,
train_classes);
}
}

@ -0,0 +1,31 @@
zh_cuan 川
zh_gan1 甘
zh_hei 黑
zh_jin 津
zh_liao 辽
zh_min 闽
zh_qiong 琼
zh_sx 晋
zh_xin 新
zh_yue 粤
zh_zhe 浙
zh_e 鄂
zh_gui 贵
zh_hu 沪
zh_jing 京
zh_lu 鲁
zh_ning 宁
zh_shan 陕
zh_wan 皖
zh_yu 豫
zh_yun 云
zh_gan 赣
zh_gui1 桂
zh_ji 冀
zh_jl 吉
zh_meng 蒙
zh_qing 青
zh_su 苏
zh_xiang 湘
zh_yu1 渝
zh_zang 藏

@ -0,0 +1,196 @@
#include "easypr/train/svm_train.h"
#include "easypr/util/util.h"
#include "easypr/config.h"
#ifdef OS_WINDOWS
#include <ctime>
#endif
using namespace cv;
using namespace cv::ml;
// 原版C++语言 训练代码
namespace easypr {
SvmTrain::SvmTrain(const char* plates_folder, const char* xml): plates_folder_(plates_folder), svm_xml_(xml) {
assert(plates_folder);
assert(xml);
extractFeature = getHistomPlusColoFeatures;
}
void SvmTrain::train() {
svm_ = cv::ml::SVM::create();
svm_->setType(cv::ml::SVM::C_SVC);
svm_->setKernel(cv::ml::SVM::RBF);
svm_->setDegree(0.1);
// 1.4 bug fix: old 1.4 ver gamma is 1
svm_->setGamma(0.1);
svm_->setCoef0(0.1);
svm_->setC(1);
svm_->setNu(0.1);
svm_->setP(0.1);
svm_->setTermCriteria(cvTermCriteria(CV_TERMCRIT_ITER, 20000, 0.0001));
this->prepare();
if (train_file_list_.size() == 0) {
fprintf(stdout, "No file found in the train folder!\n");
fprintf(stdout, "You should create a folder named \"tmp\" in EasyPR main folder.\n");
fprintf(stdout, "Copy train data folder(like \"SVM\") under \"tmp\". \n");
return;
}
auto train_data = tdata();
fprintf(stdout, ">> Training SVM model, please wait...\n");
long start = utils::getTimestamp();
svm_->trainAuto(train_data, 10, SVM::getDefaultGrid(SVM::C),
SVM::getDefaultGrid(SVM::GAMMA), SVM::getDefaultGrid(SVM::P),
SVM::getDefaultGrid(SVM::NU), SVM::getDefaultGrid(SVM::COEF),
SVM::getDefaultGrid(SVM::DEGREE), true);
//svm_->train(train_data);
long end = utils::getTimestamp();
fprintf(stdout, ">> Training done. Time elapse: %ldms\n", end - start);
fprintf(stdout, ">> Saving model file...\n");
svm_->save(svm_xml_);
fprintf(stdout, ">> Your SVM Model was saved to %s\n", svm_xml_);
fprintf(stdout, ">> Testing...\n");
this->test();
}
void SvmTrain::test() {
// 1.4 bug fix: old 1.4 ver there is no null judge
// if (NULL == svm_)
LOAD_SVM_MODEL(svm_, svm_xml_);
if (test_file_list_.empty()) {
this->prepare();
}
double count_all = test_file_list_.size();
double ptrue_rtrue = 0;
double ptrue_rfalse = 0;
double pfalse_rtrue = 0;
double pfalse_rfalse = 0;
for (auto item : test_file_list_) {
auto image = cv::imread(item.file);
if (!image.data) {
std::cout << "no" << std::endl;
continue;
}
cv::Mat feature;
extractFeature(image, feature);
auto predict = int(svm_->predict(feature));
//std::cout << "predict: " << predict << std::endl;
auto real = item.label;
if (predict == kForward && real == kForward) ptrue_rtrue++;
if (predict == kForward && real == kInverse) ptrue_rfalse++;
if (predict == kInverse && real == kForward) pfalse_rtrue++;
if (predict == kInverse && real == kInverse) pfalse_rfalse++;
}
std::cout << "count_all: " << count_all << std::endl;
std::cout << "ptrue_rtrue: " << ptrue_rtrue << std::endl;
std::cout << "ptrue_rfalse: " << ptrue_rfalse << std::endl;
std::cout << "pfalse_rtrue: " << pfalse_rtrue << std::endl;
std::cout << "pfalse_rfalse: " << pfalse_rfalse << std::endl;
double precise = 0;
if (ptrue_rtrue + ptrue_rfalse != 0) {
precise = ptrue_rtrue / (ptrue_rtrue + ptrue_rfalse);
std::cout << "precise: " << precise << std::endl;
} else {
std::cout << "precise: "
<< "NA" << std::endl;
}
double recall = 0;
if (ptrue_rtrue + pfalse_rtrue != 0) {
recall = ptrue_rtrue / (ptrue_rtrue + pfalse_rtrue);
std::cout << "recall: " << recall << std::endl;
} else {
std::cout << "recall: "
<< "NA" << std::endl;
}
double Fsocre = 0;
if (precise + recall != 0) {
Fsocre = 2 * (precise * recall) / (precise + recall);
std::cout << "Fsocre: " << Fsocre << std::endl;
} else {
std::cout << "Fsocre: "
<< "NA" << std::endl;
}
}
void SvmTrain::prepare() {
srand(unsigned(time(NULL)));
char buffer[260] = {0};
sprintf(buffer, "%s/has/train", plates_folder_);
auto has_file_train_list = utils::getFiles(buffer);
std::random_shuffle(has_file_train_list.begin(), has_file_train_list.end());
sprintf(buffer, "%s/has/test", plates_folder_);
auto has_file_test_list = utils::getFiles(buffer);
std::random_shuffle(has_file_test_list.begin(), has_file_test_list.end());
sprintf(buffer, "%s/no/train", plates_folder_);
auto no_file_train_list = utils::getFiles(buffer);
std::random_shuffle(no_file_train_list.begin(), no_file_train_list.end());
sprintf(buffer, "%s/no/test", plates_folder_);
auto no_file_test_list = utils::getFiles(buffer);
std::random_shuffle(no_file_test_list.begin(), no_file_test_list.end());
fprintf(stdout, ">> Collecting train data...\n");
for (auto file : has_file_train_list)
train_file_list_.push_back({ file, kForward });
for (auto file : no_file_train_list)
train_file_list_.push_back({ file, kInverse });
fprintf(stdout, ">> Collecting test data...\n");
for (auto file : has_file_test_list)
test_file_list_.push_back({ file, kForward });
for (auto file : no_file_test_list)
test_file_list_.push_back({ file, kInverse });
}
cv::Ptr<cv::ml::TrainData> SvmTrain::tdata() {
cv::Mat samples;
std::vector<int> responses;
for (auto f : train_file_list_) {
auto image = cv::imread(f.file);
if (!image.data) {
fprintf(stdout, ">> Invalid image: %s ignore.\n", f.file.c_str());
continue;
}
cv::Mat feature;
extractFeature(image, feature);
feature = feature.reshape(1, 1);
samples.push_back(feature);
responses.push_back(int(f.label));
}
cv::Mat samples_, responses_;
samples.convertTo(samples_, CV_32FC1);
cv::Mat(responses).copyTo(responses_);
return cv::ml::TrainData::create(samples_, cv::ml::SampleTypes::ROW_SAMPLE, responses_);
}
} // namespace easypr
Loading…
Cancel
Save