chars_identify.cpp

2 years ago · 30d539a265
parent 6f850796fa
commit 30d539a265
1 changed files with 89 additions and 16 deletions
--- a/src/src/core/chars_identify.cpp
+++ b/src/src/core/chars_identify.cpp
@ -18,6 +18,7 @@ CharsIdentify* CharsIdentify::instance() {
  return instance_;
 }
 // 主要用于加载和管理预训练的神经网络模型，用于字符识别
 CharsIdentify::CharsIdentify() {
  LOAD_ANN_MODEL(ann_, kDefaultAnnPath);
  LOAD_ANN_MODEL(annChinese_, kChineseAnnPath);
@ -58,18 +59,24 @@ void CharsIdentify::LoadChineseMapping(std::string path) {
  kv_->load(path);
 }
 // 对输入的特征行进行预测，并识别出最可能的字符。
 void CharsIdentify::classify(cv::Mat featureRows, std::vector<int>& out_maxIndexs,
                             std::vector<float>& out_maxVals, std::vector<bool> isChineseVec){
  // 获取特征行的行数。
  int rowNum = featureRows.rows;
-
+  // 创建一个新的矩阵output，大小为特征行的行数（rowNum）乘以总的字符数量
  cv::Mat output(rowNum, kCharsTotalNumber, CV_32FC1);
  // 使用预先训练好的模型（ann_）对输入的特征行进行预测，结果保存在output矩阵中
  ann_->predict(featureRows, output);
-
+  // 循环遍历每一行输出：
  // 对于每一行，首先获取该行的预测结果
  for (int output_index = 0; output_index < rowNum; output_index++) {
    Mat output_row = output.row(output_index);
    int result = 0;
    float maxVal = -2.f;
    bool isChinses = isChineseVec[output_index];
    // 如果该行不是中文字符（由isChineseVec向量确定），
    // 则遍历前kCharactersNumber个预测结果，找出值最大的那个，并记录其索引和值。
    if (!isChinses) {
      result = 0;
      for (int j = 0; j < kCharactersNumber; j++) {
@ -81,6 +88,8 @@ void CharsIdentify::classify(cv::Mat featureRows, std::vector<int>& out_maxIndex
        }
      }
    }
    // 如果该行是中文字符，
    // 则从kCharactersNumber开始遍历后面的预测结果，找出值最大的那个，并记录其索引和值。
    else {
      result = kCharactersNumber;
      for (int j = kCharactersNumber; j < kCharsTotalNumber; j++) {
@ -92,18 +101,20 @@ void CharsIdentify::classify(cv::Mat featureRows, std::vector<int>& out_maxIndex
        }
      }
    }
    // 将记录的最大索引和最大值分别赋值给out_maxIndexs和out_maxVals的相应位置
    out_maxIndexs[output_index] = result;
    out_maxVals[output_index] = maxVal;
  }
 }
-
+// 接受一个CCharacter类型的向量（charVec），并对每个字符进行分类。
 void CharsIdentify::classify(std::vector<CCharacter>& charVec){
  size_t charVecSize = charVec.size();
  if (charVecSize == 0)
    return;
-
+  
  // 创建一个名为featureRows的Mat对象，并通过循环将每个字符的特征提取出来并添加到featureRows中。
  Mat featureRows;
  for (size_t index = 0; index < charVecSize; index++) {
    Mat charInput = charVec[index].getCharacterMat();
@ -111,9 +122,14 @@ void CharsIdentify::classify(std::vector<CCharacter>& charVec){
    featureRows.push_back(feature);
  }
  // 创建一个输出矩阵output，并使用预先训练好的模型（ann_）对特征进行预测。
  cv::Mat output(charVecSize, kCharsTotalNumber, CV_32FC1);
  ann_->predict(featureRows, output);
  // 遍历每个输出，对于每个输出，首先获取对应的字符（通过索引），
  // 然后获取该字符的预测结果行（通过索引）。然后，函数检查该字符是否为中文字符，
  // 如果不是，它就在循环中找出值最大的预测结果，并记录其索引和值。
  // 最后，函数根据这个最大值和索引确定预测的字符，并将其作为标签。
  for (size_t output_index = 0; output_index < charVecSize; output_index++) {
    CCharacter& character = charVec[output_index];
    Mat output_row = output.row(output_index);
@ -135,6 +151,8 @@ void CharsIdentify::classify(std::vector<CCharacter>& charVec){
      }
      label = std::make_pair(kChars[result], kChars[result]).second;
    }
    // 如果字符是中文字符，函数则从预测结果的后面部分开始查找最大值，并记录其索引和值。
    // 然后，函数根据这个最大值和索引确定预测的字符，并通过键值对（kv_）查找对应的省份，将字符和省份作为标签。
    else {
      result = kCharactersNumber;
      for (int j = kCharactersNumber; j < kCharsTotalNumber; j++) {
@ -152,28 +170,33 @@ void CharsIdentify::classify(std::vector<CCharacter>& charVec){
    }
    /*std::cout << "result:" << result << std::endl;
    std::cout << "maxVal:" << maxVal << std::endl;*/
    // 函数将预测的最大值和标签分别设置到对应字符对象的得分和字符串属性中。
    character.setCharacterScore(maxVal);
    character.setCharacterStr(label);
  }
 }
-
+// 对输入的中文字符进行分类
 void CharsIdentify::classifyChineseGray(std::vector<CCharacter>& charVec){
  size_t charVecSize = charVec.size();
  if (charVecSize == 0)
    return;
  Mat featureRows;
  // 通过循环提取每个字符的特征，并存储在featureRows中
  for (size_t index = 0; index < charVecSize; index++) {
    Mat charInput = charVec[index].getCharacterMat();
    cv::Mat feature;
    extractFeature(charInput, feature);
    featureRows.push_back(feature);
  }
-
+  // 创建一个输出矩阵（output），然后使用预先训练好的模型（annGray_）对特征进行预测，并将结果存储在output中
  cv::Mat output(charVecSize, kChineseNumber, CV_32FC1);
  annGray_->predict(featureRows, output);
  // 对于输出矩阵中的每一行（每个字符的预测结果），
  // 如果该字符是中文字符，函数会从预测结果的后面部分开始查找最大值，并记录其索引和值。
  for (size_t output_index = 0; output_index < charVecSize; output_index++) {
    CCharacter& character = charVec[output_index];
    Mat output_row = output.row(output_index);
@ -198,6 +221,8 @@ void CharsIdentify::classifyChineseGray(std::vector<CCharacter>& charVec){
      isChinese = false;
    }
    // 根据这个最大值和索引确定预测的字符。
    // 这是通过查找kChars数组实现的，其中kChars可能是一个预定义的字符集。
    auto index = result + kCharsTotalNumber - kChineseNumber;
    const char* key = kChars[index];
    std::string s = key;
@ -206,12 +231,15 @@ void CharsIdentify::classifyChineseGray(std::vector<CCharacter>& charVec){
    /*std::cout << "result:" << result << std::endl;
    std::cout << "maxVal:" << maxVal << std::endl;*/
    // 将预测的最大值、预测的字符以及对应的省份作为标签，
    // 分别设置到对应字符对象的得分、字符串属性和是否为中文字符属性中
    character.setCharacterScore(maxVal);
    character.setCharacterStr(province);
    character.setIsChinese(isChinese);
  }
 }
 // 使用OpenCV库和神经网络进行中文字符识别
 void CharsIdentify::classifyChinese(std::vector<CCharacter>& charVec){
  size_t charVecSize = charVec.size();
@ -219,15 +247,20 @@ void CharsIdentify::classifyChinese(std::vector<CCharacter>& charVec){
    return;
  Mat featureRows;
  // 通过循环遍历每个字符，提取其特征并将其存储在featureRows中。
  // 这里，charFeatures函数被用于提取每个字符的特性，kChineseSize可能是一个预定义的特性大小。
  for (size_t index = 0; index < charVecSize; index++) {
    Mat charInput = charVec[index].getCharacterMat();
    Mat feature = charFeatures(charInput, kChineseSize);
    featureRows.push_back(feature);
  }
  // 创建一个输出矩阵（output），并使用预先训练好的模型（annChinese_）对特征进行预测。预测结果存储在output中。
  cv::Mat output(charVecSize, kChineseNumber, CV_32FC1);
  annChinese_->predict(featureRows, output);
  // 遍历每个预测结果，并对每个结果进行处理。对于每个预测结果，函数查找最大值及其索引。
  // 如果最大值小于或等于-1，则将最大值设置为0，并将result设置为0，同时将isChinese设置为false。
  for (size_t output_index = 0; output_index < charVecSize; output_index++) {
    CCharacter& character = charVec[output_index];
    Mat output_row = output.row(output_index);
@ -252,6 +285,8 @@ void CharsIdentify::classifyChinese(std::vector<CCharacter>& charVec){
      isChinese = false;
    }
    // 计算索引值，并使用该索引从kChars数组中获取对应的字符。
    // 同时，通过键值对（kv_）查找与该字符对应的省份。
    auto index = result + kCharsTotalNumber - kChineseNumber;
    const char* key = kChars[index];
    std::string s = key;
@ -260,18 +295,24 @@ void CharsIdentify::classifyChinese(std::vector<CCharacter>& charVec){
    /*std::cout << "result:" << result << std::endl;
    std::cout << "maxVal:" << maxVal << std::endl;*/
    // 将最大值、省份和isChinese作为标签，分别设置到对应字符对象的得分、字符串属性和是否为中文字符属性中。
    character.setCharacterScore(maxVal);
    character.setCharacterStr(province);
    character.setIsChinese(isChinese);
  }
 }
 // 对输入的图像数据进行分类
 int CharsIdentify::classify(cv::Mat f, float& maxVal, bool isChinses, bool isAlphabet){
  int result = 0;
-
+  // 调用预先训练好的模型（ann_）进行预测，并将预测结果存储在output变量中。
  cv::Mat output(1, kCharsTotalNumber, CV_32FC1);
  ann_->predict(f, output);
  // 查找最大值及其索引。如果图像数据不是中文，则会检查它是否是字母。
  // 如果它是字母，那么函数将只查找字母范围内的值（从10开始，对应于'A'）。
  // 否则，它将查找所有字符范围内的值。如果图像数据是中文，则函数将查找中文字符范围内的值
  maxVal = -2.f;
  if (!isChinses) {
    if (!isAlphabet) {
@ -309,20 +350,27 @@ int CharsIdentify::classify(cv::Mat f, float& maxVal, bool isChinses, bool isAlp
      }
    }
  }
  // 返回索引值result，该值是预测的字符在预先定义的字符集kChars中的索引。
  // 同时，它也将最大值maxVal和对应的索引result设置到输入的float引用maxVal中，以便调用者可以访问这些值。
  //std::cout << "maxVal:" << maxVal << std::endl;
  return result;
 }
-
+// 根据输入的图像数据判断它是否是一个字符（特别是中文字符）
 bool CharsIdentify::isCharacter(cv::Mat input, std::string& label, float& maxVal, bool isChinese) {
  // 调用charFeatures函数提取输入图像的特征，并存储在feature变量中。
  // 然后，它调用classify函数对特征进行分类，得到一个索引值index
  cv::Mat feature = charFeatures(input, kPredictSize);
  auto index = static_cast<int>(classify(feature, maxVal, isChinese));
-
+  
  if (isChinese) {
    //std::cout << "maxVal:" << maxVal << std::endl;
  }
  float chineseMaxThresh = 0.2f;
-
+  // 检查预测的最大值maxVal是否大于等于0.9，或者如果输入的字符是中文且最大值大于等于chineseMaxThresh（这个阈值被设置为0.2）。
  // 如果满足这些条件之一，函数将检查索引index是否小于kCharactersNumber（这可能是一个预定义的字符集大小）。
  // 如果是，则将索引对应的字符作为标签；否则，使用键值对kv_查找索引对应的省份，并将该索引对应的字符和省份作为标签。
  // 最后，函数返回true表示输入的图像是一个字符，否则返回false
  if (maxVal >= 0.9 || (isChinese && maxVal >= chineseMaxThresh)) {
    if (index < kCharactersNumber) {
      label = std::make_pair(kChars[index], kChars[index]).second;
@ -338,8 +386,10 @@ bool CharsIdentify::isCharacter(cv::Mat input, std::string& label, float& maxVal
  else
    return false;
 }
-
+// 用于识别输入的图像数据是否是一个中文字符。
 std::pair<std::string, std::string> CharsIdentify::identifyChinese(cv::Mat input, float& out, bool& isChinese) {
  // 调用charFeatures函数提取输入图像的特征，并存储在feature变量中。
  // 然后，它调用预先训练好的模型annChinese_进行预测，并将预测结果存储在output变量中。
  cv::Mat feature = charFeatures(input, kChineseSize);
  float maxVal = -2;
  int result = 0;
@ -347,6 +397,8 @@ std::pair<std::string, std::string> CharsIdentify::identifyChinese(cv::Mat input
  cv::Mat output(1, kChineseNumber, CV_32FC1);
  annChinese_->predict(feature, output);
  // 遍历输出数组，找到最大的值及其索引。
  // 如果最大值大于0.9，则将isChinese设置为true，表示输入的字符可能是中文。
  for (int j = 0; j < kChineseNumber; j++) {
    float val = output.at<float>(j);
    //std::cout << "j:" << j << "val:" << val << std::endl;
@ -357,6 +409,8 @@ std::pair<std::string, std::string> CharsIdentify::identifyChinese(cv::Mat input
  }
  // no match
  // 如果索引值为-1（即没有匹配的字符），
  // 则将result设置为0，maxVal设置为0，并将isChinese设置为false，表示输入的字符不是中文。
  if (-1 == result) {
    result = 0;
    maxVal = 0;
@ -365,7 +419,7 @@ std::pair<std::string, std::string> CharsIdentify::identifyChinese(cv::Mat input
  else if (maxVal > 0.9){
    isChinese = true;
  }
-
+  // 通过索引值获取字符的标签和省份，并将最大值保存到out中。函数返回一个由字符标签和省份组成的pair。
  auto index = result + kCharsTotalNumber - kChineseNumber;
  const char* key = kChars[index];
  std::string s = key;
@ -374,15 +428,18 @@ std::pair<std::string, std::string> CharsIdentify::identifyChinese(cv::Mat input
  return std::make_pair(s, province);
 }
-
+// 从输入的图像（可能是一个灰度图像）中识别出可能的中文字符。
 std::pair<std::string, std::string> CharsIdentify::identifyChineseGray(cv::Mat input, float& out, bool& isChinese) {
  cv::Mat feature;
  // 通过extractFeature函数提取输入图像的特征，并将特征保存在feature变量中。
  // 然后，它使用预先训练好的模型annGray_进行预测，并将预测结果存储在output变量中。
  extractFeature(input, feature);
  float maxVal = -2;
  int result = 0;
  cv::Mat output(1, kChineseNumber, CV_32FC1);
  annGray_->predict(feature, output);
-
+  // 遍历输出数组，找到最大的值及其索引。
  // 如果最大值大于0.9，则将isChinese设置为true，表示输入的字符可能是中文。
  for (int j = 0; j < kChineseNumber; j++) {
    float val = output.at<float>(j);
    //std::cout << "j:" << j << "val:" << val << std::endl;
@ -392,6 +449,8 @@ std::pair<std::string, std::string> CharsIdentify::identifyChineseGray(cv::Mat i
    }
  }
  // no match
  // 如果索引值为-1（即没有匹配的字符），
  // 则将result设置为0，maxVal设置为0，并将isChinese设置为false，表示输入的字符不是中文
  if (-1 == result) {
    result = 0;
    maxVal = 0;
@ -399,6 +458,7 @@ std::pair<std::string, std::string> CharsIdentify::identifyChineseGray(cv::Mat i
  } else if (maxVal > 0.9){
    isChinese = true;
  }
  // 通过索引值获取字符的标签和省份，并将最大值保存到out中。函数返回一个由字符标签和省份组成的pair。
  auto index = result + kCharsTotalNumber - kChineseNumber;
  const char* key = kChars[index];
  std::string s = key;
@ -407,11 +467,15 @@ std::pair<std::string, std::string> CharsIdentify::identifyChineseGray(cv::Mat i
  return std::make_pair(s, province);
 }
-
+// 用于识别输入的图像数据是否是一个字符。
 std::pair<std::string, std::string> CharsIdentify::identify(cv::Mat input, bool isChinese, bool isAlphabet) {
  // 过调用charFeatures函数提取输入图像的特征，并存储在feature变量中。
  // 然后，它调用预先训练好的模型classify进行预测，并将预测结果存储在index变量中。
  cv::Mat feature = charFeatures(input, kPredictSize);
  float maxVal = -2;
  auto index = static_cast<int>(classify(feature, maxVal, isChinese, isAlphabet));
  // 检查索引值index是否小于字符集大小kCharactersNumber。如果是，则返回由相同字符组成的pair；
  // 否则，获取索引对应的字符作为键，并使用键值对kv_查找对应的省份。
  if (index < kCharactersNumber) {
    return std::make_pair(kChars[index], kChars[index]);
  }
@ -423,10 +487,14 @@ std::pair<std::string, std::string> CharsIdentify::identify(cv::Mat input, bool
  }
 }
 // 用于处理一组输入的图像数据并识别出对应的字符和省份。
 // 函数参数包括输入图像数据（inputs），输出结果（outputs）以及一个布尔值向量（isChineseVec）
 int CharsIdentify::identify(std::vector<cv::Mat> inputs, std::vector<std::pair<std::string, std::string>>& outputs,
                            std::vector<bool> isChineseVec) {
  // featureRows创建一个空的Mat对象。它将被用于存储所有输入图像的特征。
  Mat featureRows;
  size_t input_size = inputs.size();
  // 每一张图像提取特征，并将这些特征添加到featureRows中。
  for (size_t i = 0; i < input_size; i++) {
    Mat input = inputs[i];
    cv::Mat feature = charFeatures(input, kPredictSize);
@ -435,8 +503,13 @@ int CharsIdentify::identify(std::vector<cv::Mat> inputs, std::vector<std::pair<s
  std::vector<int> maxIndexs;
  std::vector<float> maxVals;
  // 调用classify函数，输入特征矩阵featureRows，并返回最大值的索引（maxIndexs）和最大值（maxVals）。
  // 同时，根据这些最大值，更新isChineseVec中的对应元素。
  classify(featureRows, maxIndexs, maxVals, isChineseVec);
-
+  // 遍历所有的输入图像，对于每一张图像，根据其对应的最大值索引，构造一个输出对，并存储在outputs中。
  // 如果索引小于字符集大小kCharactersNumber，则输出对由相同字符组成；
  // 否则，获取索引对应的字符作为键，并使用键值对kv_查找对应的省份。
  for (size_t row_index = 0; row_index < input_size; row_index++) {
    int index = maxIndexs[row_index];
    if (index < kCharactersNumber) {