#!/usr/bin/awk -f # Fill out a genetic code with nucleotide abiguity codes. function tryPair( theHash, theCodon, theBase, theFirst, theSecond ) { key = gensub( /./, theFirst, theBase, theCodon ); aa1 = theHash[key]; key = gensub( /./, theSecond, theBase, theCodon ); aa2 = theHash[key]; if ( aa1 && aa1 == aa2 ) { return aa1; } else { return 0; # false; } } function tryTriplet( theHash, theCodon, theBase, theFirst, theSecond, theThird ) { key = gensub( /./, theFirst, theBase, theCodon ); aa1 = theHash[key]; key = gensub( /./, theSecond, theBase, theCodon ); aa2 = theHash[key]; key = gensub( /./, theThird, theBase, theCodon ); aa3 = theHash[key]; if ( aa1 && aa1 == aa2 && aa2 == aa3 ) { return aa1; } else { return 0; # false; } } function tryQuadruplet( theHash, theCodon, theBase, theFirst, theSecond, theThird, theFourth ) { key = gensub( /./, theFirst, theBase, theCodon ); aa1 = theHash[key]; key = gensub( /./, theSecond, theBase, theCodon ); aa2 = theHash[key]; key = gensub( /./, theThird, theBase, theCodon ); aa3 = theHash[key]; key = gensub( /./, theFourth, theBase, theCodon ); aa4 = theHash[key]; if ( aa1 && aa1 == aa2 && aa2 == aa3 && aa3 == aa4 ) { return aa1; } else { return 0; # false; } } function addOne( theHash, theCodon, theBase, theValue, theAminoAcid ) { key = gensub( /./, theValue, theBase, theCodon ); if ( theAminoAcid !~ /^[A-Z*]$/ ) print ">>", theCodon, key, theAminoAcid; theHash[key] = theAminoAcid; } # theBase must be 1, 2, or 3. # theCodon is an array of three letters. # Vary it while holding other positions constant. function varyOne( theHash, theCodon, theBase ) { aa = tryPair( theHash, theCodon, theBase, "a", "c" ); if ( aa ) { addOne( theHash, theCodon, theBase, "m", aa ); } aa = tryPair( theHash, theCodon, theBase, "a", "g" ); if ( aa ) { addOne( theHash, theCodon, theBase, "r", aa ); } aa = tryPair( theHash, theCodon, theBase, "a", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "w", aa ); } aa = tryPair( theHash, theCodon, theBase, "c", "g" ); if ( aa ) { addOne( theHash, theCodon, theBase, "s", aa ); } aa = tryPair( theHash, theCodon, theBase, "c", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "y", aa ); } aa = tryPair( theHash, theCodon, theBase, "g", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "k", aa ); } aa = tryTriplet( theHash, theCodon, theBase, "a", "c", "g" ); # aa = tryPair( theHash, theCodon, theBase, "m", "g" ); if ( aa ) { addOne( theHash, theCodon, theBase, "v", aa ); } aa = tryTriplet( theHash, theCodon, theBase, "a", "c", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "h", aa ); } aa = tryTriplet( theHash, theCodon, theBase, "a", "g", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "d", aa ); } aa = tryTriplet( theHash, theCodon, theBase, "c", "g", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "b", aa ); } aa = tryQuadruplet( theHash, theCodon, theBase, "a", "c", "g", "u" ); if ( aa ) { addOne( theHash, theCodon, theBase, "n", aa ); } } function varyTwo( theHash, theCodon, theBase ) { } function tryCodon( theHash, theCodon ) { varyOne( theHash, theCodon, 1 ); varyOne( theHash, theCodon, 2 ); varyOne( theHash, theCodon, 3 ); # varyTwo( theHash, theCodon, 1 ); # varyTwo( theHash, theCodon, 2 ); # varyTwo( theHash, theCodon, 3 ); } function addAmbig( theHash ) { # tryCodon( theHash, "aaa" ); # tryCodon( theHash, "aac" ); # tryCodon( theHash, "aag" ); # tryCodon( theHash, "aau" ); # tryCodon( theHash, "aca" ); # tryCodon( theHash, "acc" ); # tryCodon( theHash, "acg" ); # tryCodon( theHash, "acu" ); # tryCodon( theHash, "aga" ); # tryCodon( theHash, "agc" ); # tryCodon( theHash, "agg" ); # tryCodon( theHash, "agu" ); # tryCodon( theHash, "aua" ); # tryCodon( theHash, "auc" ); # tryCodon( theHash, "aug" ); # tryCodon( theHash, "auu" ); # tryCodon( theHash, "caa" ); # tryCodon( theHash, "cac" ); # tryCodon( theHash, "cag" ); # tryCodon( theHash, "cau" ); # tryCodon( theHash, "cca" ); # tryCodon( theHash, "ccc" ); # tryCodon( theHash, "ccg" ); # tryCodon( theHash, "ccu" ); # tryCodon( theHash, "cga" ); # tryCodon( theHash, "cgc" ); # tryCodon( theHash, "cgg" ); # tryCodon( theHash, "cgu" ); # tryCodon( theHash, "cua" ); # tryCodon( theHash, "cuc" ); # tryCodon( theHash, "cug" ); # tryCodon( theHash, "cuu" ); # tryCodon( theHash, "gaa" ); # tryCodon( theHash, "gac" ); # tryCodon( theHash, "gag" ); # tryCodon( theHash, "gau" ); # tryCodon( theHash, "gca" ); # tryCodon( theHash, "gcc" ); # tryCodon( theHash, "gcg" ); # tryCodon( theHash, "gcu" ); # tryCodon( theHash, "gga" ); # tryCodon( theHash, "ggc" ); # tryCodon( theHash, "ggg" ); # tryCodon( theHash, "ggu" ); # tryCodon( theHash, "gua" ); # tryCodon( theHash, "guc" ); # tryCodon( theHash, "gug" ); # tryCodon( theHash, "guu" ); # tryCodon( theHash, "uaa" ); # tryCodon( theHash, "uac" ); # tryCodon( theHash, "uag" ); # tryCodon( theHash, "uau" ); # tryCodon( theHash, "uca" ); # tryCodon( theHash, "ucc" ); # tryCodon( theHash, "ucg" ); # tryCodon( theHash, "ucu" ); # tryCodon( theHash, "uga" ); # tryCodon( theHash, "ugc" ); # tryCodon( theHash, "ugg" ); # tryCodon( theHash, "ugu" ); # tryCodon( theHash, "uua" ); # tryCodon( theHash, "uuc" ); # tryCodon( theHash, "uug" ); # tryCodon( theHash, "uuu" ); for ( c in theHash ) { newHash[c] = theHash[c]; } for ( c in newHash ) { tryCodon( theHash, c ); } # Do it a second time for more codes. # First we need to clean up the hash. # Why? How is it getting corrupted? for ( x in theHash ) { if ( theHash[x] ) { ;#OK } else { delete theHash[x]; } } for ( c in theHash ) { newHash[c] = theHash[c]; } for ( c in newHash ) { tryCodon( theHash, c ); } # Clean up again. for ( x in theHash ) { if ( theHash[x] ) { ;#OK } else { delete theHash[x]; } } # Twice seems to be enough. # I suppose we could keep doing this until the hash refuses to grow. } BEGIN { frame = 1; } END { addAmbig( gencode ); for ( x in gencode ) { print x, gencode[x]; } } { gencode[$1] = $2; }