|
|
@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
|
|
#include <fstream>
|
|
|
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define INDEL_CHAR '-'
|
|
|
|
|
|
|
|
int MATCH = 1, DIS_MATCH = -1, INDEL = -3;
|
|
|
|
|
|
|
|
char choice;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResUnit //一次双序列比对后的结果
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
string str1; //原始序列1
|
|
|
|
|
|
|
|
string str2; //原始序列2
|
|
|
|
|
|
|
|
string res1; //结果序列1
|
|
|
|
|
|
|
|
string res2; //结果序列2
|
|
|
|
|
|
|
|
int score; //序列总得分,反映两个序列的相似程度
|
|
|
|
|
|
|
|
int tag; //禁止迭代多次
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SingleSeq //一个序列被整合后的样子
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
string str; //一个序列的原始序列
|
|
|
|
|
|
|
|
string res; //一个序列被整合后的样子
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct BacktrackingUnit {
|
|
|
|
|
|
|
|
int goUp; //是否向上回溯
|
|
|
|
|
|
|
|
int goLeftUp; //是否向左上回溯
|
|
|
|
|
|
|
|
int goLeft; //是否向左回溯
|
|
|
|
|
|
|
|
int score; //得分矩阵第(i, j)这个单元的分值
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct BacktrackingUnit *unitLine;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct SequenceUnit {
|
|
|
|
|
|
|
|
string *str1; //匹配序列1
|
|
|
|
|
|
|
|
string *str2; //匹配序列2
|
|
|
|
|
|
|
|
int score;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
比较三种路径之间谁最大
|
|
|
|
|
|
|
|
f(i-1,j-1),f(i-1,j)+indel,f(i,j-1)+indel
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int max3(int a, int b, int c)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
return max(max(a, b), c);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
比较两个字符类型属于什么,match,dismatch,indel
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int myCompare(char a, char b)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
if (a == b)
|
|
|
|
|
|
|
|
return MATCH;
|
|
|
|
|
|
|
|
else if (a == ' ' || b == ' ')
|
|
|
|
|
|
|
|
return INDEL;
|
|
|
|
|
|
|
|
return DIS_MATCH;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ResUnit traceback(unitLine **item, const int i, const int j, string str1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
string str2, string res1, string res2, int n, ResUnit resUnit)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
unitLine temp = item[i][j];
|
|
|
|
|
|
|
|
if (resUnit.tag != 1) {
|
|
|
|
|
|
|
|
if (!(i || j)) { // 到矩阵单元(0, 0)才算结束,这代表初始的两个字符串的每个字符都被比对到了
|
|
|
|
|
|
|
|
resUnit.str1 = str1;
|
|
|
|
|
|
|
|
resUnit.str2 = str2;
|
|
|
|
|
|
|
|
resUnit.res1 = res1;
|
|
|
|
|
|
|
|
resUnit.res2 = res2;
|
|
|
|
|
|
|
|
resUnit.tag = 1;
|
|
|
|
|
|
|
|
return resUnit;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (temp->goUp) { // 向上回溯一格
|
|
|
|
|
|
|
|
res1 = str1[i - 1] + res1;
|
|
|
|
|
|
|
|
res2 = INDEL_CHAR + res2;
|
|
|
|
|
|
|
|
resUnit = traceback(item, i - 1, j, str1, str2, res1, res2, n + 1, resUnit);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (temp->goLeftUp) { // 向左上回溯一格
|
|
|
|
|
|
|
|
res1 = str1[i - 1] + res1;
|
|
|
|
|
|
|
|
res2 = str2[j - 1] + res2;
|
|
|
|
|
|
|
|
resUnit = traceback(item, i - 1, j - 1, str1, str2, res1, res2, n + 1, resUnit);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (temp->goLeft) { // 向左回溯一格
|
|
|
|
|
|
|
|
res1 = INDEL_CHAR + res1;
|
|
|
|
|
|
|
|
res2 = str2[j - 1] + res2;
|
|
|
|
|
|
|
|
resUnit = traceback(item, i, j - 1, str1, str2, res1, res2, n + 1, resUnit);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return resUnit;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return resUnit;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ResUnit NeedlemanWunch(string str1, string str2)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
//字符串str1,str2长度
|
|
|
|
|
|
|
|
const int m = str1.length();
|
|
|
|
|
|
|
|
const int n = str2.length();
|
|
|
|
|
|
|
|
int m1, m2, m3, mm;
|
|
|
|
|
|
|
|
unitLine **unit;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 初始化
|
|
|
|
|
|
|
|
if ((unit = (unitLine **)malloc(sizeof(unitLine *) * (m + 1))) == NULL) {
|
|
|
|
|
|
|
|
fputs("Error: Out of space!\n", stderr);
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i <= m; i++) {
|
|
|
|
|
|
|
|
if ((unit[i] = (unitLine *)malloc(sizeof(unitLine) * (n + 1))) == NULL) {
|
|
|
|
|
|
|
|
fputs("Error: Out of space!\n", stderr);
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int j = 0; j <= n; j++) {
|
|
|
|
|
|
|
|
if ((unit[i][j] = (unitLine)malloc(sizeof(struct BacktrackingUnit))) == NULL) {
|
|
|
|
|
|
|
|
fputs("Error: Out of space!\n", stderr);
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
unit[i][j]->goUp = 0;
|
|
|
|
|
|
|
|
unit[i][j]->goLeftUp = 0;
|
|
|
|
|
|
|
|
unit[i][j]->goLeft = 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
unit[0][0]->score = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 1; i <= m; i++) {
|
|
|
|
|
|
|
|
unit[i][0]->score = INDEL * i;
|
|
|
|
|
|
|
|
unit[i][0]->goUp = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int j = 1; j <= n; j++) {
|
|
|
|
|
|
|
|
unit[0][j]->score = INDEL * j;
|
|
|
|
|
|
|
|
unit[0][j]->goLeft = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 动态规划算法计算得分矩阵每个单元的分值
|
|
|
|
|
|
|
|
for (int i = 1; i <= m; i++) {
|
|
|
|
|
|
|
|
for (int j = 1; j <= n; j++) {
|
|
|
|
|
|
|
|
m1 = unit[i - 1][j]->score + INDEL;
|
|
|
|
|
|
|
|
m2 = unit[i - 1][j - 1]->score + myCompare(str1[i - 1], str2[j - 1]);
|
|
|
|
|
|
|
|
m3 = unit[i][j - 1]->score + INDEL;
|
|
|
|
|
|
|
|
mm = max3(m1, m2, m3);
|
|
|
|
|
|
|
|
unit[i][j]->score = mm;
|
|
|
|
|
|
|
|
//判断路径来源
|
|
|
|
|
|
|
|
if (m1 == mm)
|
|
|
|
|
|
|
|
unit[i][j]->goUp = 1;
|
|
|
|
|
|
|
|
if (m2 == mm)
|
|
|
|
|
|
|
|
unit[i][j]->goLeftUp = 1;
|
|
|
|
|
|
|
|
if (m3 == mm)
|
|
|
|
|
|
|
|
unit[i][j]->goLeft = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//开始回溯
|
|
|
|
|
|
|
|
ResUnit res;
|
|
|
|
|
|
|
|
res.tag = 0;
|
|
|
|
|
|
|
|
res = traceback(unit, m, n, str1, str2, "", "", 0, res);
|
|
|
|
|
|
|
|
res.score = unit[m][n]->score;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//释放内存
|
|
|
|
|
|
|
|
for (int i = 0; i <= m; i++) {
|
|
|
|
|
|
|
|
for (int j = 0; j <= n; j++) {
|
|
|
|
|
|
|
|
free(unit[i][j]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
free(unit[i]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
free(unit);
|
|
|
|
|
|
|
|
//返回值
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool legal_string(string s)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < s.size(); i++) {
|
|
|
|
|
|
|
|
if (s[i] != 'A' && s[i] != 'T' && s[i] != 'C' && s[i] != 'G')
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void read(string &a, string &b)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
char c;
|
|
|
|
|
|
|
|
if (!choice) {
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
cout << "请选择:\n1.输出到显示屏\n2.输出为文本\n";
|
|
|
|
|
|
|
|
cin >> choice;
|
|
|
|
|
|
|
|
if (choice != '1' && choice != '2') {
|
|
|
|
|
|
|
|
cout << "#请选择合法的选项!\n";
|
|
|
|
|
|
|
|
Sleep(1000);
|
|
|
|
|
|
|
|
} else
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
cout << "请输入需要比对的基因序列:\n序列一:";
|
|
|
|
|
|
|
|
cin >> a;
|
|
|
|
|
|
|
|
if (legal_string(a))
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
|
|
|
cout << "#请输入合法的基因序列!\n";
|
|
|
|
|
|
|
|
Sleep(1000);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
cout << "序列二:";
|
|
|
|
|
|
|
|
cin >> b;
|
|
|
|
|
|
|
|
if (legal_string(b))
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
|
|
|
cout << "#请输入合法的基因序列!\n";
|
|
|
|
|
|
|
|
Sleep(1000);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
cout << "请选择比对规则:\n1.空缺优先(默认)\n2.错误优先\n3.没有优先\n";
|
|
|
|
|
|
|
|
cin >> c;
|
|
|
|
|
|
|
|
if (c != '1' && c != '2' && c != '3') {
|
|
|
|
|
|
|
|
cout << "#请选择合法的选项!\n";
|
|
|
|
|
|
|
|
Sleep(1000);
|
|
|
|
|
|
|
|
} else
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '2') {
|
|
|
|
|
|
|
|
DIS_MATCH = -3;
|
|
|
|
|
|
|
|
INDEL = -1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else if (c == '3') {
|
|
|
|
|
|
|
|
DIS_MATCH = INDEL = -1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int main()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
string a, b;
|
|
|
|
|
|
|
|
read(a, b);
|
|
|
|
|
|
|
|
ResUnit s = NeedlemanWunch(a, b);
|
|
|
|
|
|
|
|
if (choice == '2') {
|
|
|
|
|
|
|
|
ofstream ofile{"ANS.txt", ios::out};
|
|
|
|
|
|
|
|
ofile << s.res1 << endl;
|
|
|
|
|
|
|
|
ofile << s.res2 << endl;
|
|
|
|
|
|
|
|
ofile.close();
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
cout << s.res1 << endl;
|
|
|
|
|
|
|
|
cout << s.res2 << endl;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Sleep(2000);
|
|
|
|
|
|
|
|
cout << "请选择:\n1.继续比对\n2.停止比对\n";
|
|
|
|
|
|
|
|
char c;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
|
|
|
|
cin >> c;
|
|
|
|
|
|
|
|
if (c != '1' && c != '2') {
|
|
|
|
|
|
|
|
cout << "#请选择合法的选项!\n";
|
|
|
|
|
|
|
|
Sleep(1000);
|
|
|
|
|
|
|
|
} else
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c == '2')
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
"GGATCGA"
|
|
|
|
|
|
|
|
"GAATTCAGTTA"
|
|
|
|
|
|
|
|
*/
|