perl应用:DNA序列翻译(下):从fasta格式中读取序列,然后输出蛋白质序列,以及fasta格式的介绍

perl应用:DNA序列翻译(下):从fasta格式中读取序列,然后输出蛋白质序列,以及fasta格式的介绍,第1张

概述use strict; use warnings; my $dna =''; my $protein =''; my @file_data=( ); my @filedata; @filedata = get_file_data();$dna = extract_sequence_from_fasta_data(@filedat



use strict;  use warnings;      my $dna      ='';  my $protein  ='';  my @file_data=( );  my @filedata;  @filedata  = get_file_data();$dna       = extract_sequence_from_fasta_data(@filedata);  $protein   = dna2peptIDe($dna);  print_sequence($protein,25);    sub get_file_data{  	# A subroutine to get data from a file given its filename	#读取文件的子序列    my $dna_filename;	my @filedata;    print "please input the Path just like this f:\\perl\\data.txt\n";       chomp($dna_filename=<STDIN>); 	open(DNAfilename,$dna_filename)||dIE("can not open the file!");    	@filedata     = <DNAfilename>;  	close DNAfilename;  	return @filedata;#子函数的返回值一定要记住写}sub extract_sequence_from_fasta_data  {      #*******************************************************************      # A subroutine to extract FASTA sequence data from an array      # 得到其中的序列      # fasta格式介绍:      # 包括三个部分      # 1.第一行中以>开头的注释行,后面是名称和序列的来源      # 2.标准单字母符号的序列      # 3.*表示结尾      #*******************************************************************        my (@fasta_file_data) =@_;      my $sequence =' ';      foreach my $line (@fasta_file_data)      {          #这里忽略空白行          if ($line=~/^\s*$/)          {              next;          }          #忽略注释行          elsif($line=~/^\s*#/)          {              next;          }          #忽略fasta的第一行          elsif($line=~/^>/)          {              next;          }          else          {              $sequence .=$line;          }      }      $sequence=~s/\s//g;      return $sequence;  }    sub print_sequence  {      # A subroutine to format and print sequence data      my ($sequence,$length) = @_;      for (my $pos =0; $pos<length($sequence);$pos+=$length)      {          print substr($sequence,$pos,$length),"\n";      }  }           sub codon2aa     {           #第三种方法        #也就是运用哈希        #我们将所有的密码子作为hash的key,然后将代表的氨基酸作为hash的value        #然后进行匹配        # codon2aa         # A subroutine to translate a DNA 3-character codon to an amino acID         # Version 3,using hash lookup         my($codon) = @_;              $codon = uc $codon;#uc=uppercase;lc=lowercase                       #也就是大小写转换,uc表示将所有的小写 转换为大写                   #lc将所有的大写转换为小写              my(%genetic_code) = (                  'TCA' => 'S',# Serine         'TCC' => 'S',# Serine         'TCG' => 'S',# Serine         'TCT' => 'S',# Serine         'TTC' => 'F',# Phenylalanine         'TTT' => 'F',# Phenylalanine         'TTA' => 'L',# Leucine         'TTG' => 'L',# Leucine         'TAC' => 'Y',# Tyrosine          'TAT' => 'Y',# Tyrosine         'TAA' => '_',# Stop         'TAG' => '_',# Stop         'TGC' => 'C',# Cysteine         'TGT' => 'C',# Cysteine         'TGA' => '_',# Stop         'TGG' => 'W',# Tryptophan         'CTA' => 'L',# Leucine         'CTC' => 'L',# Leucine         'CTG' => 'L',# Leucine         'CTT' => 'L',# Leucine         'CCA' => 'P',# Proline         'CCC' => 'P',# Proline         'CCG' => 'P',# Proline         'CCT' => 'P',# Proline         'CAC' => 'H',# HistIDine         'CAT' => 'H',# HistIDine         'CAA' => 'Q',# glutamine         'CAG' => 'Q',# glutamine         'CGA' => 'R',# Arginine         'CGC' => 'R',# Arginine         'CGG' => 'R',# Arginine         'CGT' => 'R',# Arginine         'ATA' => 'I',# Isoleucine         'ATC' => 'I',# Isoleucine         'ATT' => 'I',# Isoleucine         'ATG' => 'M',# Methionine         'ACA' => 'T',# Threonine         'ACC' => 'T',# Threonine         'ACG' => 'T',# Threonine         'ACT' => 'T',# Threonine         'AAC' => 'N',# Asparagine         'AAT' => 'N',# Asparagine         'AAA' => 'K',# Lysine         'AAG' => 'K',# Lysine         'AGC' => 'S',# Serine         'AGT' => 'S',# Serine         'AGA' => 'R',# Arginine         'AGG' => 'R',# Arginine         'GTA' => 'V',# Valine         'GTC' => 'V',# Valine         'GTG' => 'V',# Valine         'GTT' => 'V',# Valine         'GCA' => 'A',# Alanine         'GCC' => 'A',# Alanine         'GCG' => 'A',# Alanine         'GCT' => 'A',# Alanine             'GAC' => 'D',# Aspartic AcID         'GAT' => 'D',# Aspartic AcID         'GAA' => 'E',# glutamic AcID         'GAG' => 'E',# glutamic AcID         'GGA' => 'G',# Glycine         'GGC' => 'G',# Glycine         'GGG' => 'G',# Glycine         'GGT' => 'G',# Glycine         );              if(exists $genetic_code{$codon})         {             return $genetic_code{$codon};         }        else        {                      print STDERR "Bad codon \"$codon\"!!\n";                 exit;         }     }       sub dna2peptIDe  {      my ($dna)=@_;      my $protein ='';      for (my $i=0; $i<(length($dna)-2);$i+=3)      {          $protein .=codon2aa(substr($dna,$i,3));      }      return $protein;#这个词错误找了一晚上,没有返回值,所以结果总是没有内容,以后要引以为戒,子程序一定要有返回值  }  







结果如下:

F:\>f:perl\a.plRWRR_GVLgalGRPPTGLQRRRRMGPAQ_EYAAWEA_LEAEVVVGAFATAWDAAEWSVQVRGSLAGVVRECAGSGDMEGDGSDPEPPDAGEDSKSENGENAPIYCICRKPDINCFMIGCDNCNEWFHGDCIRITEKMAKAIREWYCRECREKDPKLEIRYRHKKSRERDGNERDSSEPRDEGGGRKRPVPDPDLQRRAGSGTGVGAMLARGSASPHKsspQPLVATPSQHHQQQQQQIKRSARMCGECEACRRTEDCGHCdfcRDMKKFGGPNKIRQKCRLRQCQLRARESYKYFPSSLSPVTPSESLPRPRRPLPTQQQPQPSQKLGRIREDEGAVAsstVKEPPEATATPEPLSDEDLF:\>
总结

以上是内存溢出为你收集整理的perl应用:DNA序列翻译(下):从fasta格式中读取序列,然后输出蛋白质序列,以及fasta格式的介绍全部内容,希望文章能够帮你解决perl应用:DNA序列翻译(下):从fasta格式中读取序列,然后输出蛋白质序列,以及fasta格式的介绍所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/1293097.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-06-10
下一篇 2022-06-10

发表评论

登录后才能评论

评论列表(0条)

保存