/* parse and convert secondary structures Walter Fontana, Ivo L Hofacker, Peter F Stadler Vienna RNA Package */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include "ViennaRNA/utils.h" #include "ViennaRNA/RNAstruct.h" #define PRIVATE static #define PUBLIC #define MAXLEN 10000 PRIVATE char *aux_struct(const char *structure); /* on return from parse_structure(), b2C() or b2Shapiro() ... */ PUBLIC int loop_size[STRUC]; /* contains loop sizes of a structure */ PUBLIC int helix_size[STRUC]; /* contains helix sizes of a structure */ PUBLIC int loop_degree[STRUC]; /* contains loop degrees of a structure */ PUBLIC int loops; /* n of loops and stacks in a structure */ PUBLIC int unpaired, pairs; /* n of unpaired digits and pairs */ /*---------------------------------------------------------------------------*/ PRIVATE char *aux_struct(const char* structure ) { short *match_paren; int i, o, p; char *string; string = (char *) vrna_alloc(sizeof(char)*(strlen(structure)+1)); match_paren = (short *) vrna_alloc(sizeof(short)*(strlen(structure)/2+1)); strcpy(string, structure); i = o = 0; while (string[i]) { switch (string[i]) { case '.': break; case '(': match_paren[++o]=i; break; case ')': p=i; while ((string[p+1]==')')&&(match_paren[o-1]==match_paren[o]-1)) { p++; o--; } string[p]=']'; i=p; string[match_paren[o]]='['; o--; break; default: vrna_message_error("Junk in structure at aux_structure\n"); } i++; } free(match_paren); return(string); } /*---------------------------------------------------------------------------*/ PUBLIC char *b2HIT(const char *structure) { int i, u, p, l; char *string, *temp, *HIT, tt[10]; temp = (char *) vrna_alloc(strlen(structure)*4+4); string = aux_struct( structure ); strcpy(temp,"("); i=p=u=0; l=1; while (string[i]) { switch(string[i]) { case '.': u++; break; case '[': if (u>0) { sprintf(tt, "(U%d)" , u); strcat(temp+l, tt); l+=strlen(tt); u=0; } strcat(temp+l, "("); l++; break; case ')': if (u>0) { sprintf(tt, "(U%d)" , u); strcat(temp+l, tt); l+=strlen(tt); u=0; } p++; break; case ']': if (u>0) { sprintf(tt, "(U%d)" , u); strcat(temp+l, tt); l+=strlen(tt); u=0; } sprintf(tt,"P%d)", p+1); strcat(temp+l, tt); l+=strlen(tt); p=0; break; } i++; } if (u>0) { sprintf(tt, "(U%d)" , u); strcat(temp+l, tt); l+=strlen(tt); } strcat(temp+l, "R)"); free( string ); HIT = (char *) vrna_alloc(sizeof(char)*(strlen(temp)+2)); strcpy(HIT, temp); free(temp); return(HIT); } /*---------------------------------------------------------------------------*/ PUBLIC char *b2C(const char *structure ) { short *bulge, *loop; int i, lp, p, l; char *string, *Coarse, *temp; bulge = (short *) vrna_alloc(sizeof(short)*(strlen(structure)/3+1)); loop = (short *) vrna_alloc(sizeof(short)*(strlen(structure)/3+1)); temp = (char *) vrna_alloc(4*strlen(structure)+2); for (i = 0; i < STRUC; i++) { loop_size[i] = helix_size[i] = 0; } loop_degree[0]=0; /* open structure has degree 0 */ pairs = unpaired = loops = lp = 0; loop[0]=0; string = aux_struct( structure ); i=p=l=0; temp[l++] = '('; while (string[i]) { switch(string[i]) { case '.': loop_size[loop[lp]]++; break; case '[': temp[l++]='('; if ((i>0)&&(string[i-1]=='(')) bulge[lp]=1; lp++; loop_degree[++loops]=1; loop[lp]=loops; bulge[lp]=0; break; case ')': if (string[i-1]==']') bulge[lp]=1; p++; break; case ']': if (string[i-1]==']') bulge[lp]=1; switch (loop_degree[loop[lp]]) { case 1: temp[l++]='H'; break; /* hairpin */ case 2: if (bulge[lp]==1) temp[l++] = 'B'; /* bulge */ else temp[l++] = 'I'; /* internal loop */ break; default: temp[l++] = 'M'; /* multiloop */ } temp[l++] = ')'; pairs+=p+1; p=0; loop_degree[loop[--lp]]++; break; } i++; } temp[l++] = 'R'; temp[l++] = ')'; temp[l]='\0'; free(string); Coarse = (char *) vrna_alloc(sizeof(char)*(strlen(temp)+2)); strcpy(Coarse, temp); free(temp); free(bulge); free(loop); return(Coarse); } /*---------------------------------------------------------------------------*/ PUBLIC char *b2Shapiro(const char *structure ) { short *bulge, *loop; int i, lp, p, l, k; char *string, *Shapiro, *temp, tt[10]; bulge = (short *) vrna_alloc(sizeof(short)*(strlen(structure)/3+1)); loop = (short *) vrna_alloc(sizeof(short)*(strlen(structure)/3+1)); temp = (char *) vrna_alloc(4*strlen(structure)+3); for (i = 0; i < STRUC; i++) { loop_size[i] = helix_size[i] = 0; } loop_degree[0]=0; /* open structure has degree 0 */ pairs = unpaired = loops = lp = 0; loop[0]=0; string = aux_struct( structure ); i=p=l=0; temp[l++] = '('; /* root */ while (string[i]) { switch(string[i]) { case '.': unpaired++; loop_size[loop[lp]]++; break; case '[': temp[l++]='('; temp[l++]='('; if ((i>0)&&(string[i-1]=='(' || string[i-1]=='[')) bulge[lp]=1; lp++; loop_degree[++loops]=1; loop[lp]=loops; bulge[lp]=0; break; case ')': if (string[i-1]==']') bulge[lp]=1; p++; break; case ']': if (string[i-1]==']') bulge[lp]=1; switch (loop_degree[loop[lp]]) { case 1: temp[l++]='H'; break; /* hairpin */ case 2: if (bulge[lp]==1) temp[l++] = 'B'; /* bulge */ else temp[l++] = 'I'; /* internal loop */ break; default: temp[l++] = 'M'; /* multiloop */ } helix_size[loop[lp]]=p+1; sprintf(tt, "%d)" , loop_size[loop[lp]]); for(k=0; k0)&&(string[i-1]=='(')) bulge[lp]=1; lp++; loop_degree[++loops]=1; loop[lp]=loops; bulge[lp]=0; break; case ')': if (string[i-1]==']') bulge[lp]=1; p++; break; case ']': if (string[i-1]==']') bulge[lp]=1; helix_size[loop[lp]]=p+1; pairs+=p+1; p=0; loop_degree[loop[--lp]]++; break; } i++; } free(string); free(bulge); free(loop); free(temp); } /*---------------------------------------------------------------------------*/ PUBLIC char *add_root(const char *structure) { char *xS; xS = (char *) vrna_alloc(sizeof(char)*(strlen(structure)+4)); xS[0] = '('; strcat(xS,structure); strcat(xS,"R)"); return xS; } /*---------------------------------------------------------------------------*/ PUBLIC char *expand_Shapiro(const char *structure) { char *xS, *temp; int i, l; temp = (char *) vrna_alloc(4*strlen(structure)+2); i = 1; l = 1; temp[0] = '('; while (i=0) { switch (structure[i]) { case '(': for (j=0; j