perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上)

perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上),第1张

概述1.直接转换法 use warnings;use strict;#利用perl来进行DNA序列氨基酸序列的翻译,我们来介绍一下几种方法:#第一种方法:#DNA序列和氨基酸序列通过密码子来联系,密码子一共有61个,蛋白质有20个#第一种方法也就是最简单的方法,就是建立一一对应的关系# A subroutine to translate a DNA 3-character cod


1.直接转换法

use warnings;use strict;#利用perl来进行DNA序列到氨基酸序列的翻译,我们来介绍一下几种方法:#第一种方法:#DNA序列和氨基酸序列通过密码子来联系,密码子一共有61个,蛋白质有20个#第一种方法也就是最简单的方法,就是建立一一对应的关系# A subroutine to translate a DNA 3-character codon to an amino acID # 这个程序的效率是非常低的,因为每翻译一个氨基酸,需要进行一次循环 sub codon2aa { 	   my($codon) = @_;             if ( $codon =~ /TCA/i )    { return 'S' }    # Serine     elsif ( $codon =~ /TCC/i )    { return 'S' }    # Serine     elsif ( $codon =~ /TCG/i )    { return 'S' }    # Serine     elsif ( $codon =~ /TCT/i )    { return 'S' }    # Serine     elsif ( $codon =~ /TTC/i )    { return 'F' }    # Phenylalanine     elsif ( $codon =~ /TTT/i )    { return 'F' }    # Phenylalanine     elsif ( $codon =~ /TTA/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /TTG/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /TAC/i )    { return 'Y' }    # Tyrosine     elsif ( $codon =~ /TAT/i )    { return 'Y' }    # Tyrosine     elsif ( $codon =~ /TAA/i )    { return '_' }    # Stop终止密码子    elsif ( $codon =~ /TAG/i )    { return '_' }    # Stop终止密码子    elsif ( $codon =~ /TGC/i )    { return 'C' }    # Cysteine     elsif ( $codon =~ /TGT/i )    { return 'C' }    # Cysteine     elsif ( $codon =~ /TGA/i )    { return '_' }    # Stop     elsif ( $codon =~ /TGG/i )    { return 'W' }    # Tryptophan     elsif ( $codon =~ /CTA/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /CTC/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /CTG/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /CTT/i )    { return 'L' }    # Leucine     elsif ( $codon =~ /CCA/i )    { return 'P' }    # Proline     elsif ( $codon =~ /CCC/i )    { return 'P' }    # Proline     elsif ( $codon =~ /CCG/i )    { return 'P' }    # Proline     elsif ( $codon =~ /CCT/i )    { return 'P' }    # Proline     elsif ( $codon =~ /CAC/i )    { return 'H' }    # HistIDine     elsif ( $codon =~ /CAT/i )    { return 'H' }    # HistIDine     elsif ( $codon =~ /CAA/i )    { return 'Q' }    # glutamine     elsif ( $codon =~ /CAG/i )    { return 'Q' }    # glutamine     elsif ( $codon =~ /CGA/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /CGC/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /CGG/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /CGT/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /ATA/i )    { return 'I' }    # Isoleucine     elsif ( $codon =~ /ATC/i )    { return 'I' }    # Isoleucine     elsif ( $codon =~ /ATT/i )    { return 'I' }    # Isoleucine     elsif ( $codon =~ /ATG/i )    { return 'M' }    # Methionine     elsif ( $codon =~ /ACA/i )    { return 'T' }    # Threonine     elsif ( $codon =~ /ACC/i )    { return 'T' }    # Threonine     elsif ( $codon =~ /ACG/i )    { return 'T' }    # Threonine     elsif ( $codon =~ /ACT/i )    { return 'T' }    # Threonine     elsif ( $codon =~ /AAC/i )    { return 'N' }    # Asparagine     elsif ( $codon =~ /AAT/i )    { return 'N' }    # Asparagine     elsif ( $codon =~ /AAA/i )    { return 'K' }    # Lysine     elsif ( $codon =~ /AAG/i )    { return 'K' }    # Lysine     elsif ( $codon =~ /AGC/i )    { return 'S' }    # Serine     elsif ( $codon =~ /AGT/i )    { return 'S' }    # Serine     elsif ( $codon =~ /AGA/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /AGG/i )    { return 'R' }    # Arginine     elsif ( $codon =~ /GTA/i )    { return 'V' }    # Valine     elsif ( $codon =~ /GTC/i )    { return 'V' }    # Valine     elsif ( $codon =~ /GTG/i )    { return 'V' }    # Valine     elsif ( $codon =~ /GTT/i )    { return 'V' }    # Valine     elsif ( $codon =~ /GCA/i )    { return 'A' }    # Alanine     elsif ( $codon =~ /GCC/i )    { return 'A' }    # Alanine     elsif ( $codon =~ /GCG/i )    { return 'A' }    # Alanine     elsif ( $codon =~ /GCT/i )    { return 'A' }    # Alanine     elsif ( $codon =~ /GAC/i )    { return 'D' }    # Aspartic AcID     elsif ( $codon =~ /GAT/i )    { return 'D' }    # Aspartic AcID     elsif ( $codon =~ /GAA/i )    { return 'E' }    # glutamic AcID     elsif ( $codon =~ /GAG/i )    { return 'E' }    # glutamic AcID     elsif ( $codon =~ /GGA/i )    { return 'G' }    # Glycine     elsif ( $codon =~ /GGC/i )    { return 'G' }    # Glycine     elsif ( $codon =~ /GGG/i )    { return 'G' }    # Glycine        elsif ( $codon =~ /GGT/i )    { return 'G' }    # Glycine     else 	{ 		print STDERR "Bad codon \"$codon\"!!\n";             exit;	} }



2.第二种版本:简并法

#下面是第二种方法,这里需要一点生物学知识#我们可以看看第一种方法中#/GGA/   =>Glycine#/GGC/   =>Glycine#/GGG/   =>Glycine#/GGT/   =>glycine#上面四个虽然密码子的第三位不同,但是他们表达的都是同一种蛋白#这就是密码子的简并性#下面我们就利用这一点和正则表达式的 . 来匹配# A subroutine to translate a DNA 3-character codon to an amino acID #   Version 2  sub codon2aa {        my($codon) = @_;          if ( $codon =~ /GC./i)        { return 'A' }    # Alanine         elsif ( $codon =~ /TG[TC]/i)     { return 'C' }    # Cysteine     elsif ( $codon =~ /GA[TC]/i)     { return 'D' }    # Aspartic AcID     elsif ( $codon =~ /GA[AG]/i)     { return 'E' }    # glutamic AcID     elsif ( $codon =~ /TT[TC]/i)     { return 'F' }    # Phenylalanine     elsif ( $codon =~ /GG./i)        { return 'G' }    # Glycine     elsif ( $codon =~ /CA[TC]/i)     { return 'H' }    # HistIDine     elsif ( $codon =~ /AT[TCA]/i)    { return 'I' }    # Isoleucine     elsif ( $codon =~ /AA[AG]/i)     { return 'K' }    # Lysine     elsif ( $codon =~ /TT[AG]|CT./i) { return 'L' }    # Leucine     elsif ( $codon =~ /ATG/i)        { return 'M' }    # Methionine     elsif ( $codon =~ /AA[TC]/i)     { return 'N' }    # Asparagine     elsif ( $codon =~ /CC./i)        { return 'P' }    # Proline     elsif ( $codon =~ /CA[AG]/i)     { return 'Q' }    # glutamine     elsif ( $codon =~ /CG.|AG[AG]/i) { return 'R' }    # Arginine     elsif ( $codon =~ /TC.|AG[TC]/i) { return 'S' }    # Serine     elsif ( $codon =~ /AC./i)        { return 'T' }    # Threonine     elsif ( $codon =~ /GT./i)        { return 'V' }    # Valine     elsif ( $codon =~ /TGG/i)        { return 'W' }    # Tryptophan     elsif ( $codon =~ /TA[TC]/i)     { return 'Y' }    # Tyrosine     elsif ( $codon =~ /TA[AG]|TGA/i) { return '_' }    # Stop     else 	{ 		print STDERR "Bad codon \"$codon\"!!\n";         exit; 	} } 



3.第三中方法:哈希法

#第三种方法#也就是运用哈希#我们将所有的密码子作为hash的key,然后将代表的氨基酸作为hash的value#然后进行匹配# codon2aa # # A subroutine to translate a DNA 3-character codon to an amino acID #   Version 3,using hash lookup  sub codon2aa {     my($codon) = @_;      $codon = uc $codon;#uc=uppercase;lc=lowercase	               #也就是大小写转换,uc表示将所有的小写 转换为大写		       #lc将所有的大写转换为小写      my(%genetic_code) = (          'TCA' => 'S',# Serine     'TCC' => 'S',# Serine     'TCG' => 'S',# Serine     'TCT' => 'S',# Serine     'TTC' => 'F',# Phenylalanine     'TTT' => 'F',# Phenylalanine     'TTA' => 'L',# Leucine     'TTG' => 'L',# Leucine     'TAC' => 'Y',# Tyrosine      'TAT' => 'Y',# Tyrosine     'TAA' => '_',# Stop     'TAG' => '_',# Stop     'TGC' => 'C',# Cysteine     'TGT' => 'C',# Cysteine     'TGA' => '_',# Stop     'TGG' => 'W',# Tryptophan     'CTA' => 'L',# Leucine     'CTC' => 'L',# Leucine     'CTG' => 'L',# Leucine     'CTT' => 'L',# Leucine     'CCA' => 'P',# Proline     'CCC' => 'P',# Proline     'CCG' => 'P',# Proline     'CCT' => 'P',# Proline     'CAC' => 'H',# HistIDine     'CAT' => 'H',# HistIDine     'CAA' => 'Q',# glutamine     'CAG' => 'Q',# glutamine     'CGA' => 'R',# Arginine     'CGC' => 'R',# Arginine     'CGG' => 'R',# Arginine     'CGT' => 'R',# Arginine     'ATA' => 'I',# Isoleucine     'ATC' => 'I',# Isoleucine     'ATT' => 'I',# Isoleucine     'ATG' => 'M',# Methionine     'ACA' => 'T',# Threonine     'ACC' => 'T',# Threonine     'ACG' => 'T',# Threonine     'ACT' => 'T',# Threonine     'AAC' => 'N',# Asparagine     'AAT' => 'N',# Asparagine     'AAA' => 'K',# Lysine     'AAG' => 'K',# Lysine     'AGC' => 'S',# Serine     'AGT' => 'S',# Serine     'AGA' => 'R',# Arginine     'AGG' => 'R',# Arginine     'GTA' => 'V',# Valine     'GTC' => 'V',# Valine     'GTG' => 'V',# Valine     'GTT' => 'V',# Valine     'GCA' => 'A',# Alanine     'GCC' => 'A',# Alanine     'GCG' => 'A',# Alanine     'GCT' => 'A',# Alanine         'GAC' => 'D',# Aspartic AcID     'GAT' => 'D',# Aspartic AcID     'GAA' => 'E',# glutamic AcID     'GAG' => 'E',# glutamic AcID     'GGA' => 'G',# Glycine     'GGC' => 'G',# Glycine     'GGG' => 'G',# Glycine     'GGT' => 'G',# Glycine     );      if(exists $genetic_code{$codon})     {         return $genetic_code{$codon};     }	else    {              print STDERR "Bad codon \"$codon\"!!\n";             exit;     } } 


当然这里面,hash的速度是最快的。

所以我们更推荐低三种方法。

总结

以上是内存溢出为你收集整理的perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上)全部内容,希望文章能够帮你解决perl:DNA序列翻译成氨基酸序列的若干方法,直接法,简并法,哈希法,以及perl中的uc和lc函数(上)所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/1293518.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-10
下一篇 2022-06-10

发表评论

登录后才能评论

评论列表(0条)

保存