use warnings;use strict;#利用perl来进行DNA序列到氨基酸序列的翻译,我们来介绍一下几种方法:#第一种方法:#DNA序列和氨基酸序列通过密码子来联系,密码子一共有61个,蛋白质有20个#第一种方法也就是最简单的方法,就是建立一一对应的关系# A subroutine to translate a DNA 3-character codon to an amino acID # 这个程序的效率是非常低的,因为每翻译一个氨基酸,需要进行一次循环 sub codon2aa { my($codon) = @_; if ( $codon =~ /TCA/i ) { return 'S' } # Serine elsif ( $codon =~ /TCC/i ) { return 'S' } # Serine elsif ( $codon =~ /TCG/i ) { return 'S' } # Serine elsif ( $codon =~ /TCT/i ) { return 'S' } # Serine elsif ( $codon =~ /TTC/i ) { return 'F' } # Phenylalanine elsif ( $codon =~ /TTT/i ) { return 'F' } # Phenylalanine elsif ( $codon =~ /TTA/i ) { return 'L' } # Leucine elsif ( $codon =~ /TTG/i ) { return 'L' } # Leucine elsif ( $codon =~ /TAC/i ) { return 'Y' } # Tyrosine elsif ( $codon =~ /TAT/i ) { return 'Y' } # Tyrosine elsif ( $codon =~ /TAA/i ) { return '_' } # Stop终止密码子 elsif ( $codon =~ /TAG/i ) { return '_' } # Stop终止密码子 elsif ( $codon =~ /TGC/i ) { return 'C' } # Cysteine elsif ( $codon =~ /TGT/i ) { return 'C' } # Cysteine elsif ( $codon =~ /TGA/i ) { return '_' } # Stop elsif ( $codon =~ /TGG/i ) { return 'W' } # Tryptophan elsif ( $codon =~ /CTA/i ) { return 'L' } # Leucine elsif ( $codon =~ /CTC/i ) { return 'L' } # Leucine elsif ( $codon =~ /CTG/i ) { return 'L' } # Leucine elsif ( $codon =~ /CTT/i ) { return 'L' } # Leucine elsif ( $codon =~ /CCA/i ) { return 'P' } # Proline elsif ( $codon =~ /CCC/i ) { return 'P' } # Proline elsif ( $codon =~ /CCG/i ) { return 'P' } # Proline elsif ( $codon =~ /CCT/i ) { return 'P' } # Proline elsif ( $codon =~ /CAC/i ) { return 'H' } # HistIDine elsif ( $codon =~ /CAT/i ) { return 'H' } # HistIDine elsif ( $codon =~ /CAA/i ) { return 'Q' } # glutamine elsif ( $codon =~ /CAG/i ) { return 'Q' } # glutamine elsif ( $codon =~ /CGA/i ) { return 'R' } # Arginine elsif ( $codon =~ /CGC/i ) { return 'R' } # Arginine elsif ( $codon =~ /CGG/i ) { return 'R' } # Arginine elsif ( $codon =~ /CGT/i ) { return 'R' } # Arginine elsif ( $codon =~ /ATA/i ) { return 'I' } # Isoleucine elsif ( $codon =~ /ATC/i ) { return 'I' } # Isoleucine elsif ( $codon =~ /ATT/i ) { return 'I' } # Isoleucine elsif ( $codon =~ /ATG/i ) { return 'M' } # Methionine elsif ( $codon =~ /ACA/i ) { return 'T' } # Threonine elsif ( $codon =~ /ACC/i ) { return 'T' } # Threonine elsif ( $codon =~ /ACG/i ) { return 'T' } # Threonine elsif ( $codon =~ /ACT/i ) { return 'T' } # Threonine elsif ( $codon =~ /AAC/i ) { return 'N' } # Asparagine elsif ( $codon =~ /AAT/i ) { return 'N' } # Asparagine elsif ( $codon =~ /AAA/i ) { return 'K' } # Lysine elsif ( $codon =~ /AAG/i ) { return 'K' } # Lysine elsif ( $codon =~ /AGC/i ) { return 'S' } # Serine elsif ( $codon =~ /AGT/i ) { return 'S' } # Serine elsif ( $codon =~ /AGA/i ) { return 'R' } # Arginine elsif ( $codon =~ /AGG/i ) { return 'R' } # Arginine elsif ( $codon =~ /GTA/i ) { return 'V' } # Valine elsif ( $codon =~ /GTC/i ) { return 'V' } # Valine elsif ( $codon =~ /GTG/i ) { return 'V' } # Valine elsif ( $codon =~ /GTT/i ) { return 'V' } # Valine elsif ( $codon =~ /GCA/i ) { return 'A' } # Alanine elsif ( $codon =~ /GCC/i ) { return 'A' } # Alanine elsif ( $codon =~ /GCG/i ) { return 'A' } # Alanine elsif ( $codon =~ /GCT/i ) { return 'A' } # Alanine elsif ( $codon =~ /GAC/i ) { return 'D' } # Aspartic AcID elsif ( $codon =~ /GAT/i ) { return 'D' } # Aspartic AcID elsif ( $codon =~ /GAA/i ) { return 'E' } # glutamic AcID elsif ( $codon =~ /GAG/i ) { return 'E' } # glutamic AcID elsif ( $codon =~ /GGA/i ) { return 'G' } # Glycine elsif ( $codon =~ /GGC/i ) { return 'G' } # Glycine elsif ( $codon =~ /GGG/i ) { return 'G' } # Glycine elsif ( $codon =~ /GGT/i ) { return 'G' } # Glycine else { print STDERR "Bad codon \"$codon\"!!\n"; exit; } }
#下面是第二种方法,这里需要一点生物学知识#我们可以看看第一种方法中#/GGA/ =>Glycine#/GGC/ =>Glycine#/GGG/ =>Glycine#/GGT/ =>glycine#上面四个虽然密码子的第三位不同,但是他们表达的都是同一种蛋白#这就是密码子的简并性#下面我们就利用这一点和正则表达式的 . 来匹配# A subroutine to translate a DNA 3-character codon to an amino acID # Version 2 sub codon2aa { my($codon) = @_; if ( $codon =~ /GC./i) { return 'A' } # Alanine elsif ( $codon =~ /TG[TC]/i) { return 'C' } # Cysteine elsif ( $codon =~ /GA[TC]/i) { return 'D' } # Aspartic AcID elsif ( $codon =~ /GA[AG]/i) { return 'E' } # glutamic AcID elsif ( $codon =~ /TT[TC]/i) { return 'F' } # Phenylalanine elsif ( $codon =~ /GG./i) { return 'G' } # Glycine elsif ( $codon =~ /CA[TC]/i) { return 'H' } # HistIDine elsif ( $codon =~ /AT[TCA]/i) { return 'I' } # Isoleucine elsif ( $codon =~ /AA[AG]/i) { return 'K' } # Lysine elsif ( $codon =~ /TT[AG]|CT./i) { return 'L' } # Leucine elsif ( $codon =~ /ATG/i) { return 'M' } # Methionine elsif ( $codon =~ /AA[TC]/i) { return 'N' } # Asparagine elsif ( $codon =~ /CC./i) { return 'P' } # Proline elsif ( $codon =~ /CA[AG]/i) { return 'Q' } # glutamine elsif ( $codon =~ /CG.|AG[AG]/i) { return 'R' } # Arginine elsif ( $codon =~ /TC.|AG[TC]/i) { return 'S' } # Serine elsif ( $codon =~ /AC./i) { return 'T' } # Threonine elsif ( $codon =~ /GT./i) { return 'V' } # Valine elsif ( $codon =~ /TGG/i) { return 'W' } # Tryptophan elsif ( $codon =~ /TA[TC]/i) { return 'Y' } # Tyrosine elsif ( $codon =~ /TA[AG]|TGA/i) { return '_' } # Stop else { print STDERR "Bad codon \"$codon\"!!\n"; exit; } }
3.第三中方法:哈希法
#第三种方法#也就是运用哈希#我们将所有的密码子作为hash的key,然后将代表的氨基酸作为hash的value#然后进行匹配# codon2aa # # A subroutine to translate a DNA 3-character codon to an amino acID # Version 3,using hash lookup sub codon2aa { my($codon) = @_; $codon = uc $codon;#uc=uppercase;lc=lowercase #也就是大小写转换,uc表示将所有的小写 转换为大写 #lc将所有的大写转换为小写 my(%genetic_code) = ( 'TCA' => 'S',# Serine 'TCC' => 'S',# Serine 'TCG' => 'S',# Serine 'TCT' => 'S',# Serine 'TTC' => 'F',# Phenylalanine 'TTT' => 'F',# Phenylalanine 'TTA' => 'L',# Leucine 'TTG' => 'L',# Leucine 'TAC' => 'Y',# Tyrosine 'TAT' => 'Y',# Tyrosine 'TAA' => '_',# Stop 'TAG' => '_',# Stop 'TGC' => 'C',# Cysteine 'TGT' => 'C',# Cysteine 'TGA' => '_',# Stop 'TGG' => 'W',# Tryptophan 'CTA' => 'L',# Leucine 'CTC' => 'L',# Leucine 'CTG' => 'L',# Leucine 'CTT' => 'L',# Leucine 'CCA' => 'P',# Proline 'CCC' => 'P',# Proline 'CCG' => 'P',# Proline 'CCT' => 'P',# Proline 'CAC' => 'H',# HistIDine 'CAT' => 'H',# HistIDine 'CAA' => 'Q',# glutamine 'CAG' => 'Q',# glutamine 'CGA' => 'R',# Arginine 'CGC' => 'R',# Arginine 'CGG' => 'R',# Arginine 'CGT' => 'R',# Arginine 'ATA' => 'I',# Isoleucine 'ATC' => 'I',# Isoleucine 'ATT' => 'I',# Isoleucine 'ATG' => 'M',# Methionine 'ACA' => 'T',# Threonine 'ACC' => 'T',# Threonine 'ACG' => 'T',# Threonine 'ACT' => 'T',# Threonine 'AAC' => 'N',# Asparagine 'AAT' => 'N',# Asparagine 'AAA' => 'K',# Lysine 'AAG' => 'K',# Lysine 'AGC' => 'S',# Serine 'AGT' => 'S',# Serine 'AGA' => 'R',# Arginine 'AGG' => 'R',# Arginine 'GTA' => 'V',# Valine 'GTC' => 'V',# Valine 'GTG' => 'V',# Valine 'GTT' => 'V',# Valine 'GCA' => 'A',# Alanine 'GCC' => 'A',# Alanine 'GCG' => 'A',# Alanine 'GCT' => 'A',# Alanine 'GAC' => 'D',# Aspartic AcID 'GAT' => 'D',# Aspartic AcID 'GAA' => 'E',# glutamic AcID 'GAG' => 'E',# glutamic AcID 'GGA' => 'G',# Glycine 'GGC' => 'G',# Glycine 'GGG' => 'G',# Glycine 'GGT' => 'G',# Glycine ); if(exists $genetic_code{$codon}) { return $genetic_code{$codon}; } else { print STDERR "Bad codon \"$codon\"!!\n"; exit; } }
当然这里面,hash的速度是最快的。
所以我们更推荐低三种方法。
总结以上是内存溢出为你收集整理的perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上)全部内容,希望文章能够帮你解决perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上)所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)