forked from z0on/2bRAD_denovo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcf2dadi.pl
executable file
·128 lines (117 loc) · 2.73 KB
/
vcf2dadi.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env perl
use strict;
use warnings;
#use Bio::SeqIO;
my $usage="
Usage: perl vcf2dadi.pl <vcf file> <list file>
Genome file is in fasta format;
List file gives population designations, like this:
sample1 population1
sample2 population1
sample3 population2
";
if (!$ARGV[1]) { die $usage;}
my ($VCF,$list)=@ARGV;
my %list;
open(IN,"< $list")||die"$!";
while (<IN>) {
chomp;
next if(/^\#/);
my @a=split(/\s+/);
$list{$a[0]}=$a[1];
}
close IN;
my %vcf;
my %pop;
my %record;
my %crec;
my @chroms;
open(IN,"< $VCF");
while (<IN>) {
chomp;
next if(/^##/);
if(/^#/){
my @a=split(/\s+/);
for(my $i=9;$i<@a;$i++){
next if(!exists $list{$a[$i]});
# die"$a[$i] does not exists in $list\n" if(!exists $list{$a[$i]});
$record{$i}=$list{$a[$i]};
}
next;
}
my @a=split(/\s+/);
my ($chr,$pos,$ref,$alt)=($a[0],$a[1],$a[3],$a[4]);
if (!$crec{$chr}) {
push @chroms, $chr;
$crec{$chr}=1;
#print "@chroms\n-----\n";
}
next if($alt=~/,/);
$vcf{$chr}{$pos}{ref}=$ref;
$vcf{$chr}{$pos}{alt}=$alt;
# print "--------------\n$ref\t$alt\n";
foreach my $i(keys %record){
my $indv=$record{$i};
$pop{$indv}=1;
# print "$indv\t";
my $geno=$a[$i];
# print "$geno\t";
$geno=~/^(.)[\/\|](.)/;
my $tempa=$1;
my $tempb=$2;
# print "$tempa $tempb\n";
my ($a1,$a2)=(0,0);
if($tempa eq "." || $tempb eq "."){
$a1=0;
$a2=0;
}
elsif ($tempa+$tempb == 1) {
$a1=1;
$a2=1;
}
elsif ($tempa+$tempb == 2) {
$a1=0;
$a2=2;
}
elsif ($tempa+$tempb == 0) {
$a1=2;
$a2=0;
}
$vcf{$chr}{$pos}{$indv}{a1}+=$a1;
$vcf{$chr}{$pos}{$indv}{a2}+=$a2;
}
#last;
}
close IN;
$VCF=~s/\..+//;
my $outname=$VCF."_dadi.data";
open(O,"> $outname");
my $title="NAME\tOUT\tAllele1";
foreach my $pop(sort keys %pop){
$title.="\t$pop";
}
$title.="\tAllele2";
foreach my $pop(sort keys %pop){
$title.="\t$pop";
}
$title.="\tGene\tPostion\n";
print O "$title";
foreach my $id (@chroms){
#print "chrom $id...\n";
foreach my $pos (sort {$a<=>$b} keys %{$vcf{$id}}){
my $ref="A".$vcf{$id}{$pos}{ref}."A";
my $line="$ref\t$ref\t$vcf{$id}{$pos}{ref}";
foreach my $pop(sort keys %pop){
my $num=$vcf{$id}{$pos}{$pop}{a1};
$line.="\t$num";
}
$line.="\t$vcf{$id}{$pos}{alt}";
foreach my $pop(sort keys %pop){
my $num=$vcf{$id}{$pos}{$pop}{a2};
$line.="\t$num";
}
$line.="\t$id\t$pos\n";
print O "$line";
}
}
close O;