DNA 서열 분류: Fisher 판별 법
#include <stdio.h>
#include <string.h>
#include <math.h>
#define DNA_A 'a'
#define DNA_T 't'
#define DNA_C 'c'
#define DNA_G 'g'
#define DNA_AN 0
#define DNA_TN 1
#define DNA_CN 2
#define DNA_GN 3
int main()
{
int Clover[64];
char AminoAcids[21][25];
int i, j, count, ich, length[20], len, k, p, q;
char dnaArray[20][200]; //DNA
int num[60][22], ClovNum[60];
float percent[60][22];
FILE *file;
float distance[60], disMin[3], dis;
int sel[20];
double JD[22], temp; //
int dSite[22], tempInt, featureSel[8];
float percentY[20], evgM[2][8], tildeM[2], evgMinus[8];
float perMinusA[10][8], perMinusB[10][8], stanA[8][8], stanB[8][8], stanZZ[8][8], stanInv[8][8];
float disperS[2][8], disperSW[8];
float omega[8], thresY;
float testYY[20];
int testInt[20];
strcpy(AminoAcids[0], "aaaaag");
strcpy(AminoAcids[1], "aataacgaagacgatgag");
strcpy(AminoAcids[2], "taatagtat");
strcpy(AminoAcids[3], "tac");
strcpy(AminoAcids[4], "caacagcatcac");
strcpy(AminoAcids[5], "agaagcagtaggtcgtca");
strcpy(AminoAcids[6], "ggaggcggtggg");
strcpy(AminoAcids[7], "tgatgctgttgg");
strcpy(AminoAcids[8], "cgacgccgtcgg");
strcpy(AminoAcids[9], "ataatg");
strcpy(AminoAcids[10], "gtagtg");
strcpy(AminoAcids[11], "gttgtc");
strcpy(AminoAcids[12], "ttattg");
strcpy(AminoAcids[13], "tttttc");
strcpy(AminoAcids[14], "ctactg");
strcpy(AminoAcids[15], "cttctc");
strcpy(AminoAcids[16], "acaacg");
strcpy(AminoAcids[17], "acc");
strcpy(AminoAcids[18], "gcagccgctgcgtcttcc");
strcpy(AminoAcids[19], "ccaccgcctccc");
strcpy(AminoAcids[20], "attatcact");
strcpy(dnaArray[0], "aggcacggaaaaacgggaataacggaggaggacttggcacggcattacacggaggacgaggtaaaggaggcttgtctacggccggaagtgaagggggatatgaccgcttgg");
strcpy(dnaArray[1], "cggaggacaaacgggatggcggtattggaggtggcggactgttcggggaattattcggtttaaacgggacaaggaaggcggctggaacaaccggacggtggcagcaaagga");
strcpy(dnaArray[2], "gggacggatacggattctggccacggacggaaaggaggacacggcggacatacacggcggcaacggacggaacggaggaaggagggcggcaatcggtacggaggcggcgga");
strcpy(dnaArray[3], "atggataacggaaacaaaccagacaaacttcggtagaaatacagaagcttagatgcatatgttttttaaataaaatttgtattattatggtatcataaaaaaaggttgcga");
strcpy(dnaArray[4], "cggctggcggacaacggactggcggattccaaaaacggaggaggcggacggaggctacaccaccgtttcggcggaaaggcggagggctggcaggaggctcattacggggag");
strcpy(dnaArray[5], "atggaaaattttcggaaaggcggcaggcaggaggcaaaggcggaaaggaaggaaacggcggatatttcggaagtggatattaggagggcggaataaaggaacggcggcaca");
strcpy(dnaArray[6], "atgggattattgaatggcggaggaagatccggaataaaatatggcggaaagaacttgttttcggaaatggaaaaaggactaggaatcggcggcaggaaggatatggaggcg");
strcpy(dnaArray[7], "atggccgatcggcttaggctggaaggaacaaataggcggaattaaggaaggcgttctcgcttttcgacaaggaggcggaccataggaggcggattaggaacggttatgagg");
strcpy(dnaArray[8], "atggcggaaaaaggaaatgtttggcatcggcgggctccggcaactggaggttcggccatggaggcgaaaatcgtgggcggcggcagcgctggccggagtttgaggagcgcg");
strcpy(dnaArray[9], "tggccgcggaggggcccgtcgggcgcggatttctacaagggcttcctgttaaggaggtggcatccaggcgtcgcacgctcggcgcggcaggaggcacgcgggaaaaaacg");
strcpy(dnaArray[10], "gttagatttaacgttttttatggaatttatggaattataaatttaaaaatttatattttttaggtaagtaatccaacgtttttattactttttaaaattaaatatttatt");
strcpy(dnaArray[11], "gtttaattactttatcatttaatttaggttttaattttaaatttaatttaggtaagatgaatttggttttttttaaggtagttatttaattatcgttaaggaaagttaaa");
strcpy(dnaArray[12], "gtattacaggcagaccttatttaggttattattattatttggattttttttttttttttttttaagttaaccgaattattttctttaaagacgttacttaatgtcaatgc");
strcpy(dnaArray[13], "gttagtcttttttagattaaattattagattatgcagtttttttacataagaaaatttttttttcggagttcatattctaatctgtctttattaaatcttagagatatta");
strcpy(dnaArray[14], "gtattatatttttttatttttattattttagaatataatttgaggtatgtgtttaaaaaaaatttttttttttttttttttttttttttttttaaaatttataaatttaa");
strcpy(dnaArray[15], "gttatttttaaatttaattttaattttaaaatacaaaatttttactttctaaaattggtctctggatcgataatgtaaacttattgaatctatagaattacattattgat");
strcpy(dnaArray[16], "gtatgtctatttcacggaagaatgcaccactatatgatttgaaattatctatggctaaaaaccctcagtaaaatcaatccctaaacccttaaaaaacggcggcctatccc");
strcpy(dnaArray[17], "gttaattatttattccttacgggcaattaattatttattacggttttatttacaattttttttttttgtcctatagagaaattacttacaaaacgttattttacatactt");
strcpy(dnaArray[18], "gttacattatttattattatccgttatcgataattttttacctcttttttcgctgagtttttattcttactttttttcttctttatataggatctcatttaatatcttaa");
strcpy(dnaArray[19], "gtatttaactctctttactttttttttcactctctacattttcatcttctaaaactgtttgatttaaacttttgtttctttaaggattttttttacttatcctctgttat");
// ,
for(i=0; i<64; i++)
{
Clover[i] = -1;
}
for(i=0; i<21; i++)
{
len = strlen(AminoAcids[i]);
for(j=0; j<len; j+=3)
{
count = 0;
ich = AminoAcids[i][j];
switch(ich)
{
case 97:
count += DNA_AN;
break;
case 116:
count += DNA_TN;
break;
case 99:
count += DNA_CN;
break;
case 103:
count += DNA_GN;
break;
}
ich = AminoAcids[i][j+1];
switch(ich)
{
case 97:
count += DNA_AN * 4;
break;
case 116:
count += DNA_TN * 4;
break;
case 99:
count += DNA_CN * 4;
break;
case 103:
count += DNA_GN * 4;
break;
}
ich = AminoAcids[i][j+2];
switch(ich)
{
case 97:
count += DNA_AN * 16;
break;
case 116:
count += DNA_TN * 16;
break;
case 99:
count += DNA_CN * 16;
break;
case 103:
count += DNA_GN * 16;
break;
}
Clover[count] = i;
}
}
for(i=0; i<64; i++)
{
//printf("%d
", Clover[i]);
}
// 20*3
for(i=0; i<60; i++)
{
ClovNum[i] = 0;
for(j=0; j<22; j++)
{
num[i][j] = 0;
percent[i][j] = 0;
}
}
for(i=0; i<20; i++)
{
length[i] = strlen(dnaArray[i]);
for(k=0; k<3; k++)
{
for(j=k; j<length[i]; j+=3)
{
count = 0;
ich = dnaArray[i][j-2];
switch(ich)
{
case 97:
count += DNA_AN;
break;
case 116:
count += DNA_TN;
break;
case 99:
count += DNA_CN;
break;
case 103:
count += DNA_GN;
break;
}
ich = dnaArray[i][j-1];
switch(ich)
{
case 97:
count += DNA_AN * 4;
break;
case 116:
count += DNA_TN * 4;
break;
case 99:
count += DNA_CN * 4;
break;
case 103:
count += DNA_GN * 4;
break;
}
ich = dnaArray[i][j];
switch(ich)
{
case 97:
count += DNA_AN * 16;
break;
case 116:
count += DNA_TN * 16;
break;
case 99:
count += DNA_CN * 16;
break;
case 103:
count += DNA_GN * 16;
break;
}
num[3*i+k][Clover[count]] ++;
ClovNum[3*i+k]++;
if(dnaArray[i][j-2] == 'a' || dnaArray[i][j-2] == 't')
{
num[3*i+k][21] ++;
}
if(dnaArray[i][j-1] == 'a' || dnaArray[i][j-1] == 't')
{
num[3*i+k][21] ++;
}
if(dnaArray[i][j] == 'a' || dnaArray[i][j] == 't')
{
num[3*i+k][21] ++;
}
}
}
}
//
for(i=0; i<60; i++)
{
for(j=0; j<21; j++)
{
percent[i][j] = num[i][j] * 1.0 / ClovNum[i];
}
percent[i][21] = num[i][21] * 1.0 / length[i/3];
}
file = fopen("FisherNum.txt", "w");
for(i=0; i<60; i++)
{
fprintf(file, "%d ", ClovNum[i]);
for(j=0; j<22; j++)
{
fprintf(file, "%d ", num[i][j]);
}
fprintf(file, "
");
}
fclose(file);
file = fopen("FisherPercent.txt", "w");
for(i=0; i<60; i++)
{
for(j=0; j<22; j++)
{
fprintf(file, "%f ", percent[i][j]);
}
fprintf(file, "
");
}
fclose(file);
// A、B
for(i=0; i<20; i++)
{
sel[i] = -1;
}
for(i=0; i<10; i++)
{
for(k=0; k<3; k++)
{
disMin[k] = 60000;
for(j=0; j<30; j++)
{
if(i == j/3)
continue;
dis = 0;
for(p=0; p<22; p++)
{
dis += (percent[3*i+k][p]-percent[j][p]) * (percent[3*i+k][p]-percent[j][p]);
}
if(disMin[k] > dis)
disMin[k] = dis;
}
}
dis = 60000;
for(k=0; k<3; k++)
{
if(disMin[k] < dis)
{
dis = disMin[k];
sel[i] = 3*i+k;
}
}
}
for(i=10; i<20; i++)
{
for(k=0; k<3; k++)
{
disMin[k] = 60000;
for(j=30; j<60; j++)
{
if(i == j/3)
continue;
dis = 0;
for(p=0; p<22; p++)
{
dis += (percent[3*i+k][p]-percent[j][p]) * (percent[3*i+k][p]-percent[j][p]);
}
if(disMin[k] > dis)
disMin[k] = dis;
}
}
dis = 60000;
for(k=0; k<3; k++)
{
if(disMin[k] < dis)
{
dis = disMin[k];
sel[i] = 3*i+k;
}
}
}
for(i=0; i<20; i++)
{
printf("%d ", sel[i]);
}
printf("
");
//
for(i=0; i<22; i++)
{
JD[i] = 0;
}
for(i=0; i<10; i++)
{
for(j=10; j<20; j++)
{
for(p=0; p<22; p++)
{
JD[p] += sqrt((percent[sel[i]][p]-percent[sel[j]][p]) * (percent[sel[i]][p]-percent[sel[j]][p]));
}
}
}
for(i=0; i<22; i++)
{
printf("%f ", JD[i]);
dSite[i] = i;
}
printf("
");
// JD , 8
for(i=0; i<21; i++)
for(j=21; j>=i+1; j--)
if(JD[j] > JD[j-1])
{
temp = JD[j];
JD[j] = JD[j-1];
JD[j-1] = temp;
tempInt = dSite[j];
dSite[j] = dSite[j-1];
dSite[j-1] = tempInt;
};
// 8
for(i=0; i<8; i++)
{
featureSel[i] = dSite[i];
printf("%d ", dSite[i]);
}
printf("
");
// 8 txt
file = fopen("feature.txt", "w");
for(i=0; i<10; i++)
{
fprintf(file, "%2d %2d ", i, 1);
for(j=0; j<8; j++)
{
fprintf(file, "%f ", percent[dSite[i]][featureSel[j]]);
}
fprintf(file, "
");
}
fprintf(file, "
");
for(i=10; i<20; i++)
{
fprintf(file, "%2d %2d ", i, -1);
for(j=0; j<8; j++)
{
fprintf(file, "%f ", percent[dSite[i]][featureSel[j]]);
}
fprintf(file, "
");
}
fclose(file);
//Fisher
//
for(i=0; i<2; i++)
{
for(j=0; j<8; j++)
evgM[i][j] = 0;
}
for(i=0; i<10; i++)
{
for(j=0; j<8; j++)
{
evgM[0][j] += percent[dSite[i]][featureSel[j]];
}
}
for(i=10; i<20; i++)
{
for(j=0; j<8; j++)
{
evgM[1][j] += percent[dSite[i]][featureSel[j]];
}
}
for(i=0; i<2; i++)
{
for(j=0; j<8; j++)
{
evgM[i][j] /= 10;
printf("%f %f
", evgM[0][j], evgM[1][j]);
}
}
//
for(i=0; i<10; i++)
{
for(j=0; j<8; j++)
{
perMinusA[i][j] = percent[dSite[i]][featureSel[j]] - evgM[0][j];
}
}
for(i=10; i<20; i++)
{
for(j=0; j<8; j++)
{
perMinusB[i-10][j] = percent[dSite[i]][featureSel[j]] - evgM[1][j];
}
}
for(i=0; i<8; i++)
{
for(j=0; j<8; j++)
{
stanA[i][j] = 0;
for(p=0; p<10; p++)
stanA[i][j] += perMinusA[p][i] * perMinusA[p][j];
}
}
for(i=0; i<8; i++)
{
for(j=0; j<8; j++)
{
stanB[i][j] = 0;
for(p=0; p<10; p++)
stanB[i][j] += perMinusB[p][i] * perMinusB[p][j];
}
}
for(i=0; i<8; i++)
{
for(j=0; j<8; j++)
{
stanZZ[i][j] = stanA[i][j] + stanB[i][j];
}
}
file = fopen("stanZZ.txt", "w");
for(i=0; i<8; i++)
{
for(j=0; j<8; j++)
{
fprintf(file, "%f ", stanZZ[i][j]);
}
fprintf(file, "
");
}
fclose(file);
// stanZZ
file = fopen("stanInv.txt", "r");
if(file == NULL)
printf("Wrong!
");
for(i=0; i<8; i++)
{
for(j=0; j<8; j++)
{
fscanf(file, "%f", &stanInv[i][j]);
printf("%f ", stanInv[i][j]);
}
printf("
");
}
fclose(file);
// w
for(i=0; i<8; i++)
{
evgMinus[i] = evgM[0][i] - evgM[1][i];
}
printf(" :
");
for(i=0; i<8; i++)
{
omega[i] = 0;
for(j=0; j<8; j++)
{
omega[i] += stanInv[i][j] * evgMinus[j];
}
printf("%f
", omega[i]);
}
// y
//d 1
for(j=0; j<20; j++)
percentY[j] = 0;
for(i=0; i<10; i++)
{
for(j=0; j<8; j++)
{
percentY[i] += percent[dSite[i]][featureSel[j]] * omega[j];
}
}
for(i=10; i<20; i++)
{
for(j=0; j<8; j++)
{
percentY[i] += percent[dSite[i]][featureSel[j]] * omega[j];
}
}
//
tildeM[0] = tildeM[1] = 0;
for(i=0; i<10; i++)
{
tildeM[0] += percentY[i];
}
for(i=10; i<20; i++)
{
tildeM[1] += percentY[i];
}
tildeM[0] /= 10;
tildeM[1] /= 10;
thresY = (tildeM[0] + tildeM[1])/2;
printf(" Y = %f
", thresY);
file = fopen("InfoFun.txt", "w");
for(i=0; i<8; i++)
{
fprintf(file, "%f ", omega[i]);
}
fprintf(file, "
%f", thresY);
fclose(file);
for(i=0; i<20; i++)
{
testYY[i] = 0;
for(j=0; j<8; j++)
{
testYY[i] += percent[dSite[i]][featureSel[j]] * omega[j];
}
if(testYY[i] > thresY)
printf("1
");
else
printf("0
");
}
file = fopen("Eight.txt", "w");
for(i=0; i<20; i++)
{
for(j=0; j<8; j++)
{
fprintf(file, "%f ", percent[dSite[i]][featureSel[j]]);
}
fprintf(file, "
");
}
fclose(file);
return 0;
}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
다양한 언어의 JSONJSON은 Javascript 표기법을 사용하여 데이터 구조를 레이아웃하는 데이터 형식입니다. 그러나 Javascript가 코드에서 이러한 구조를 나타낼 수 있는 유일한 언어는 아닙니다. 저는 일반적으로 '객체'{}...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.