You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
vc1/基因的双序列对比算法(needleman-wunsch...

282 lines
5.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <iostream>
#include <string>
#include <algorithm>
#include <fstream>
#include <windows.h>
using namespace std;
#define INDEL_CHAR '-'
int MATCH = 1, DIS_MATCH = -1, INDEL = -3;
char choice;
class ResUnit //一次双序列比对后的结果
{
public:
string str1; //原始序列1
string str2; //原始序列2
string res1; //结果序列1
string res2; //结果序列2
int score; //序列总得分,反映两个序列的相似程度
int tag; //禁止迭代多次
};
class SingleSeq //一个序列被整合后的样子
{
public:
string str; //一个序列的原始序列
string res; //一个序列被整合后的样子
};
struct BacktrackingUnit {
int goUp; //是否向上回溯
int goLeftUp; //是否向左上回溯
int goLeft; //是否向左回溯
int score; //得分矩阵第(i, j)这个单元的分值
};
typedef struct BacktrackingUnit *unitLine;
struct SequenceUnit {
string *str1; //匹配序列1
string *str2; //匹配序列2
int score;
};
/*
比较三种路径之间谁最大
f(i-1,j-1),f(i-1,j)+indel,f(i,j-1)+indel
*/
int max3(int a, int b, int c)
{
return max(max(a, b), c);
}
/*
比较两个字符类型属于什么matchdismatchindel
*/
int myCompare(char a, char b)
{
if (a == b)
return MATCH;
else if (a == ' ' || b == ' ')
return INDEL;
return DIS_MATCH;
}
ResUnit traceback(unitLine **item, const int i, const int j, string str1,
string str2, string res1, string res2, int n, ResUnit resUnit)
{
unitLine temp = item[i][j];
if (resUnit.tag != 1) {
if (!(i || j)) { // 到矩阵单元(0, 0)才算结束,这代表初始的两个字符串的每个字符都被比对到了
resUnit.str1 = str1;
resUnit.str2 = str2;
resUnit.res1 = res1;
resUnit.res2 = res2;
resUnit.tag = 1;
return resUnit;
}
if (temp->goUp) { // 向上回溯一格
res1 = str1[i - 1] + res1;
res2 = INDEL_CHAR + res2;
resUnit = traceback(item, i - 1, j, str1, str2, res1, res2, n + 1, resUnit);
}
if (temp->goLeftUp) { // 向左上回溯一格
res1 = str1[i - 1] + res1;
res2 = str2[j - 1] + res2;
resUnit = traceback(item, i - 1, j - 1, str1, str2, res1, res2, n + 1, resUnit);
}
if (temp->goLeft) { // 向左回溯一格
res1 = INDEL_CHAR + res1;
res2 = str2[j - 1] + res2;
resUnit = traceback(item, i, j - 1, str1, str2, res1, res2, n + 1, resUnit);
}
return resUnit;
}
return resUnit;
}
ResUnit NeedlemanWunch(string str1, string str2)
{
//字符串str1,str2长度
const int m = str1.length();
const int n = str2.length();
int m1, m2, m3, mm;
unitLine **unit;
// 初始化
if ((unit = (unitLine **)malloc(sizeof(unitLine *) * (m + 1))) == NULL) {
fputs("Error: Out of space!\n", stderr);
exit(1);
}
for (int i = 0; i <= m; i++) {
if ((unit[i] = (unitLine *)malloc(sizeof(unitLine) * (n + 1))) == NULL) {
fputs("Error: Out of space!\n", stderr);
exit(1);
}
for (int j = 0; j <= n; j++) {
if ((unit[i][j] = (unitLine)malloc(sizeof(struct BacktrackingUnit))) == NULL) {
fputs("Error: Out of space!\n", stderr);
exit(1);
}
unit[i][j]->goUp = 0;
unit[i][j]->goLeftUp = 0;
unit[i][j]->goLeft = 0;
}
}
unit[0][0]->score = 0;
for (int i = 1; i <= m; i++) {
unit[i][0]->score = INDEL * i;
unit[i][0]->goUp = 1;
}
for (int j = 1; j <= n; j++) {
unit[0][j]->score = INDEL * j;
unit[0][j]->goLeft = 1;
}
// 动态规划算法计算得分矩阵每个单元的分值
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
m1 = unit[i - 1][j]->score + INDEL;
m2 = unit[i - 1][j - 1]->score + myCompare(str1[i - 1], str2[j - 1]);
m3 = unit[i][j - 1]->score + INDEL;
mm = max3(m1, m2, m3);
unit[i][j]->score = mm;
//判断路径来源
if (m1 == mm)
unit[i][j]->goUp = 1;
if (m2 == mm)
unit[i][j]->goLeftUp = 1;
if (m3 == mm)
unit[i][j]->goLeft = 1;
}
}
//开始回溯
ResUnit res;
res.tag = 0;
res = traceback(unit, m, n, str1, str2, "", "", 0, res);
res.score = unit[m][n]->score;
//释放内存
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
free(unit[i][j]);
}
free(unit[i]);
}
free(unit);
//返回值
return res;
}
bool legal_string(string s)
{
for (unsigned i = 0; i < s.size(); i++) {
if (s[i] != 'A' && s[i] != 'T' && s[i] != 'C' && s[i] != 'G')
return false;
}
return true;
}
void read(string &a, string &b)
{
char c;
if (!choice) {
while (true) {
cout << "请选择:\n1.输出到显示屏\n2.输出为文本\n";
cin >> choice;
if (choice != '1' && choice != '2') {
cout << "#请选择合法的选项!\n";
Sleep(1000);
} else
break;
}
}
while (true) {
cout << "请输入需要比对的基因序列:\n序列一:";
cin >> a;
if (legal_string(a))
break;
else {
cout << "#请输入合法的基因序列!\n";
Sleep(1000);
}
}
while (true) {
cout << "序列二:";
cin >> b;
if (legal_string(b))
break;
else {
cout << "#请输入合法的基因序列!\n";
Sleep(1000);
}
}
while (true) {
cout << "请选择比对规则:\n1.空缺优先(默认)\n2.错误优先\n3.没有优先\n";
cin >> c;
if (c != '1' && c != '2' && c != '3') {
cout << "#请选择合法的选项!\n";
Sleep(1000);
} else
break;
}
if (c == '2') {
DIS_MATCH = -3;
INDEL = -1;
} else if (c == '3') {
DIS_MATCH = INDEL = -1;
}
}
int main()
{
while (true) {
string a, b;
read(a, b);
ResUnit s = NeedlemanWunch(a, b);
if (choice == '2') {
ofstream ofile{"ANS.txt", ios::out};
ofile << s.res1 << endl;
ofile << s.res2 << endl;
ofile.close();
} else {
cout << s.res1 << endl;
cout << s.res2 << endl;
}
Sleep(2000);
cout << "请选择:\n1.继续比对\n2.停止比对\n";
char c;
while (true) {
cin >> c;
if (c != '1' && c != '2') {
cout << "#请选择合法的选项!\n";
Sleep(1000);
} else
break;
}
if (c == '2')
break;
}
return 0;
}
/*
"GGATCGA"
"GAATTCAGTTA"
*/