#!/usr/bin/perluse strict;use warnings;=c---------------------------------------this perl script is edited to compute tajima's Dthe origin file path is :/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.snp/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.tajima/ifs5/ST_COMG/USER/yanZengli/tajima/data/filter.site.position/ChrB01.filter.comm.gzedited by bangemantou (yanZengli@genomics.org.cn) 2012-03-29 pm;=cutdIE "\nUsage: compute tajima's D;\ncomands:\nperl chr_name position reference_base ancestral_base major minor major_# minor_# HWE genotypes snp.data tajima.outfile filter.site.data;\n\n" if (@ARGV != 3);my $snpdb = shift;my $tajima_outfile = shift;my $filter_position = shift;open AA,$snpdb or dIE $!;open BB,">",$tajima_outfile or dIE $!;open (CC,"gzip -dc $filter_position|" ) or dIE $!;my $window = 50000;my $bin = 0.1;my $cout = int ( 1 / $bin );my $step = $window * $bin;my $sample = 0;my %filter = ();my %pi = ();my %snp = ();while ( <CC> ) { my @tt = split /\s+/; my $k = int ( $tt[1] / $step ); $filter{$k} ++;}close CC;my $chr_name;my $chrlen = 0;while ( <AA> ) { chomp; my @tt = split; my @line = @tt; $chr_name = $line[0]; $chrlen = $tt[1]; $sample = ($tt[6]+$tt[7]) / 2; my $k = int ( $tt[1] / $step ); $pi{$k} += pi ( $line[6],$line[7] ); if ( ($line[6]*$line[7]) != 0 ) { $snp{$k}++; }}close AA;print BB " chrname\tposition\tsum.pi/filter.sites\ttajima's D\tsum.snp\tfilter.site\n";my $window_num = int ( $chrlen / $step ); #the number of stepsfor ( my $i=0; $i<=($window_num-$cout); $i++ ) { my $sum_pi = 0; my $sum_snp = 0; my $filter_site = 0; my $end = $i + $cout - 1; for ( my $aa=$i; $aa<=$end; $aa++ ){ if ( !defined($pi{$aa}) ) { $pi{$aa} = 0; } if ( !defined($snp{$aa}) ) { $snp{$aa} = 0; } if ( !defined($filter{$aa}) ) { $filter{$aa} = 0; } $sum_pi += $pi{$aa}; $sum_snp += $snp{$aa}; $filter_site += $filter{$aa}; } my $d = tajima( $sample,$sum_pi,$sum_snp ); my $ID = ($i + 1) * $step; my $mean_pi = 0; my $theta = 0; if ( $filter_site ) { $mean_pi = $sum_pi / $filter_site; } print BB "$chr_name\t$ID\t$mean_pi\t$d\t$sum_snp\t$filter_site\n";}close BB;sub pi { my $a = $_[0]; my $b = $_[1]; if ( $a == 0 || $b == 0 ) { return 0; } my $pi = ( 2 * $a * $b ) / ( ($a+$b) * ($a+$b-1) ); return $pi;}sub tajima { my $n = $_[0]; my $pi = $_[1]; my $t = $_[2]; my $a1; my $a2; for ( my $i=1; $i<$n; $i++ ){ $a1 += (1 / $i); $a2 += (1 / ($i*$i)); } my $b1 = ($n + 1) / ( 3 * ($n-1) ); my $b2 = 2 * ($n*$n + $n + 3) / (9 * $n * ($n-1)); my $c1 = $b1 - (1 / $a1); my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1*$a1); my $e1 = $c1 / $a1; my $e2 = $c2 / ($a1*$a1 + $a2); if ( $t == 0) { return "NA"; next; } my $d = ( $pi - ($t/$a1) ) / sqrt ( $e1 * $t + $e2 * $t * ($t-1) ); return $d;}
说明:
1、 snp.data format
ChrB01 945 A A G A 6 30 0.166728 GG/0.000003 GG/0.000198 AG/1.000000 GG/0.000000
ChrB01 946 G G T G 6 30 0.166734 TT/0.000003 TT/0.000394 GT/1.000000 TT/0.000000
ChrB01 1660 G G A G 5 31 0.138889 AA/0.000000 AA/0.000000 AG/1.000000 AA/0.000000
this is the standard format of snp.
2、 filter.site.data
ChrB01 190
ChrB01 191
ChrB01 192
ChrB01 193
3、the algorithm is from tajima's paper.
[] Fumio Tajima,Statistical Method for Testing the Neutral Mutation Hypothesis by DNA polymorphism
总结以上是内存溢出为你收集整理的the perl script of Tajima's D (backups)全部内容,希望文章能够帮你解决the perl script of Tajima's D (backups)所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)