the perl script of Tajima's D (backups)_语言综合

#!/usr/bin/perluse strict;use warnings;=c---------------------------------------this perl script is edited to compute tajima's Dthe origin file path is :/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.snp/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.tajima/ifs5/ST_COMG/USER/yanZengli/tajima/data/filter.site.position/ChrB01.filter.comm.gzedited by bangemantou (yanZengli@genomics.org.cn) 2012-03-29 pm;=cutdIE "\nUsage: compute tajima's D;\ncomands:\nperl chr_name position reference_base ancestral_base major minor major_# minor_# HWE genotypes snp.data tajima.outfile filter.site.data;\n\n" if (@ARGV != 3);my $snpdb = shift;my $tajima_outfile = shift;my $filter_position = shift;open AA,$snpdb or dIE $!;open BB,">",$tajima_outfile or dIE $!;open (CC,"gzip -dc $filter_position|" ) or dIE $!;my $window = 50000;my $bin = 0.1;my $cout = int ( 1 / $bin );my $step = $window * $bin;my $sample = 0;my %filter = ();my %pi = ();my %snp = ();while ( <CC> ) { my @tt = split /\s+/; my $k = int ( $tt[1] / $step ); $filter{$k} ++;}close CC;my $chr_name;my $chrlen = 0;while ( <AA> ) { chomp; my @tt = split; my @line = @tt; $chr_name = $line[0]; $chrlen = $tt[1]; $sample = ($tt[6]+$tt[7]) / 2; my $k = int ( $tt[1] / $step ); $pi{$k} += pi ( $line[6],$line[7] ); if ( ($line[6]*$line[7]) != 0 ) { $snp{$k}++; }}close AA;print BB " chrname\tposition\tsum.pi/filter.sites\ttajima's D\tsum.snp\tfilter.site\n";my $window_num = int ( $chrlen / $step ); #the number of stepsfor ( my $i=0; $i<=($window_num-$cout); $i++ ) { my $sum_pi = 0; my $sum_snp = 0; my $filter_site = 0; my $end = $i + $cout - 1; for ( my $aa=$i; $aa<=$end; $aa++ ){ if ( !defined($pi{$aa}) ) { $pi{$aa} = 0; } if ( !defined($snp{$aa}) ) { $snp{$aa} = 0; } if ( !defined($filter{$aa}) ) { $filter{$aa} = 0; } $sum_pi += $pi{$aa}; $sum_snp += $snp{$aa}; $filter_site += $filter{$aa}; } my $d = tajima( $sample,$sum_pi,$sum_snp ); my $ID = ($i + 1) * $step; my $mean_pi = 0; my $theta = 0; if ( $filter_site ) { $mean_pi = $sum_pi / $filter_site; } print BB "$chr_name\t$ID\t$mean_pi\t$d\t$sum_snp\t$filter_site\n";}close BB;sub pi { my $a = $_[0]; my $b = $_[1]; if ( $a == 0 || $b == 0 ) { return 0; } my $pi = ( 2 * $a * $b ) / ( ($a+$b) * ($a+$b-1) ); return $pi;}sub tajima { my $n = $_[0]; my $pi = $_[1]; my $t = $_[2]; my $a1; my $a2; for ( my $i=1; $i<$n; $i++ ){ $a1 += (1 / $i); $a2 += (1 / ($i*$i)); } my $b1 = ($n + 1) / ( 3 * ($n-1) ); my $b2 = 2 * ($n*$n + $n + 3) / (9 * $n * ($n-1)); my $c1 = $b1 - (1 / $a1); my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1*$a1); my $e1 = $c1 / $a1; my $e2 = $c2 / ($a1*$a1 + $a2); if ( $t == 0) { return "NA"; next; } my $d = ( $pi - ($t/$a1) ) / sqrt ( $e1 * $t + $e2 * $t * ($t-1) ); return $d;}

概述#!/usr/bin/perluse strict;use warnings;=c---------------------------------------this perl script is edited to compute tajima's Dthe origin file path is :/ifs5/ST_COMG/USER/yanzengli/tajima

#!/usr/bin/perluse strict;use warnings;=c---------------------------------------this perl script is edited to compute tajima's Dthe origin file path is :/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.snp/ifs5/ST_COMG/USER/yanZengli/tajima/data/ChrB01/ChrB01.bb.tajima/ifs5/ST_COMG/USER/yanZengli/tajima/data/filter.site.position/ChrB01.filter.comm.gzedited by bangemantou (yanZengli@genomics.org.cn) 2012-03-29 pm;=cutdIE "\nUsage: compute tajima's D;\ncomands:\nperl chr_name position reference_base ancestral_base major minor major_# minor_# HWE genotypes snp.data tajima.outfile filter.site.data;\n\n" if (@ARGV != 3);my $snpdb = shift;my $tajima_outfile = shift;my $filter_position = shift;open AA,$snpdb or dIE $!;open BB,">",$tajima_outfile or dIE $!;open (CC,"gzip -dc $filter_position|" ) or dIE $!;my $window = 50000;my $bin = 0.1;my $cout = int ( 1 / $bin );my $step = $window * $bin;my $sample = 0;my %filter = ();my %pi = ();my %snp = ();while ( <CC> ) {        my @tt = split /\s+/;        my $k = int ( $tt[1] / $step );        $filter{$k} ++;}close CC;my $chr_name;my $chrlen = 0;while ( <AA> ) {        chomp;        my @tt = split;        my @line = @tt;        $chr_name = $line[0];        $chrlen = $tt[1];        $sample = ($tt[6]+$tt[7]) / 2;        my $k = int ( $tt[1] / $step );        $pi{$k} += pi ( $line[6],$line[7] );        if ( ($line[6]*$line[7]) != 0 ) { $snp{$k}++; }}close AA;print BB " chrname\tposition\tsum.pi/filter.sites\ttajima's D\tsum.snp\tfilter.site\n";my $window_num = int ( $chrlen / $step );       #the number of stepsfor ( my $i=0; $i<=($window_num-$cout); $i++ ) {        my $sum_pi = 0;        my $sum_snp = 0;        my $filter_site = 0;        my $end = $i + $cout - 1;        for ( my $aa=$i; $aa<=$end; $aa++ ){                if ( !defined($pi{$aa}) ) { $pi{$aa} = 0; }                if ( !defined($snp{$aa}) ) { $snp{$aa} = 0; }                if ( !defined($filter{$aa}) ) { $filter{$aa} = 0; }                $sum_pi += $pi{$aa};                $sum_snp += $snp{$aa};                $filter_site += $filter{$aa};        }        my $d = tajima( $sample,$sum_pi,$sum_snp );        my $ID = ($i + 1) * $step;        my $mean_pi = 0;        my $theta = 0;        if ( $filter_site ) { $mean_pi = $sum_pi / $filter_site; }        print BB "$chr_name\t$ID\t$mean_pi\t$d\t$sum_snp\t$filter_site\n";}close BB;sub pi {        my $a = $_[0];        my $b = $_[1];        if ( $a == 0 || $b == 0 ) { return 0; }        my $pi = ( 2 * $a * $b ) / ( ($a+$b) * ($a+$b-1) );        return $pi;}sub tajima {        my $n = $_[0];        my $pi = $_[1];        my $t = $_[2];        my $a1;        my $a2;        for ( my $i=1; $i<$n; $i++ ){                $a1 += (1 / $i);                $a2 += (1 / ($i*$i));        }        my $b1 = ($n + 1) / ( 3 * ($n-1) );        my $b2 = 2 * ($n*$n + $n + 3) / (9 * $n * ($n-1));        my $c1 = $b1 - (1 / $a1);        my $c2 = $b2 - ($n + 2) / ($a1 * $n) + $a2 / ($a1*$a1);        my $e1 = $c1 / $a1;        my $e2 = $c2 / ($a1*$a1 + $a2);        if ( $t == 0) { return "NA"; next; }        my $d = ( $pi - ($t/$a1) ) / sqrt ( $e1 * $t + $e2 * $t * ($t-1) );        return $d;}

说明：

1、 snp.data format

ChrB01 945 A A G A 6 30 0.166728 GG/0.000003 GG/0.000198 AG/1.000000 GG/0.000000

ChrB01 946 G G T G 6 30 0.166734 TT/0.000003 TT/0.000394 GT/1.000000 TT/0.000000

ChrB01 1660 G G A G 5 31 0.138889 AA/0.000000 AA/0.000000 AG/1.000000 AA/0.000000

this is the standard format of snp.

2、 filter.site.data

ChrB01 190

ChrB01 191

ChrB01 192

ChrB01 193

3、the algorithm is from tajima's paper.

[] Fumio Tajima,Statistical Method for Testing the Neutral Mutation Hypothesis by DNA polymorphism

总结

以上是内存溢出为你收集整理的the perl script of Tajima's D (backups)全部内容，希望文章能够帮你解决the perl script of Tajima's D (backups)所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错，欢迎将内存溢出网站推荐给程序员好友。

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/1284424.html

the perl script of Tajima's D (backups)

发表评论

评论列表（0条）

the perl script of Tajima&#39;s D (backups)

发表评论

评论列表（0条）

the perl script of Tajima's D (backups)