Solution: I can't find any way around this other than to make a new converter.
1. Using PLINK, recode .ped file such that each chromosome has its own .ped/.map set of files. Something like:
./plink --file mydata --chr 1 --recode --out malc1;
./plink --file mydata --chr 2 --recode --out malc2;
2. Use the following script to create fastPHASE input files. Enter your values for 'nchr', 'iroot', and 'oroot' after the function dbl().
dbl() {
i=$1;
f="$infile"".ped";
nrow=$(wc -l "$f" | awk '{print $1}');
echo "splitting row $i/$nrow";
# split lines containing genotypes, by allele, for each individual
#odd alleles
odd=$(sed -n $i"p" "$f" | tr ' ' '\n' | tail -n +7 | sed -n 'p;n' | tr '\n' ' ' | sed 's/ $//g');
#even alleles
even=$(sed -n $i"p" "$f" | tr ' ' '\n' | tail -n +7 | sed -n 'g;n;p' | tr '\n' ' ' | sed 's/ $//g');
echo "$odd" > "$i.tmp";
echo "$even" >> "$i.tmp";
}
export -f dbl;
nchr=17; #number of chromosomes
iroot="malc"; #root string of input files for each chromosome
oroot="chr"; #root string for output files, in fastPHASE format
for ((i=1;i<=$nchr;i++));
do echo "chr"$i;
export infile="$iroot""$i";
h1=$(wc -l "$infile".ped | awk '{print $1}'); #n samples for fastPHASE header line 1
h2a=$(head -1 "$infile".ped | awk '{print NF}');
h2=$((($h2a-6)/2)); #n loci for fastPHASE header line 2
h3a=$(awk '{print $4}' "$infile".map);
h3=$(echo "P $h3a" | tr "\n" " "); #locus positions for fastPHASE header line 3
s=$(awk '{print $2}' "$infile".ped);
#split single line per sample plink format into two line per sample fastPHASE format
seq 1 $h1 | parallel --env dbl --env infile dbl;
#Reassemble into a single file:
outfile="$oroot""$i".inp;
echo "$h1" > "$outfile";
echo "$h2" >> "$outfile";
echo "$h3" >> "$outfile";
for ((j=1;j<=$h1;j++));
do sample=$(echo "$s" | sed -n $j"p")" ";
echo "writing chr $i sample $sample $j/$h1";
echo '>'"$sample" >> "$outfile";
head -1 "$j.tmp" | sed 's/0/?/g' >> "$outfile";
tail -n +2 "$j.tmp" | sed 's/0/?/g' >> "$outfile";
rm "$j.tmp";
done;
done;
3. Phase away.
dbl() {
ReplyDeletei=1;
f=final4_"$i".ped;
nrow=$(wc -l "$f" | awk '{print $1}');
echo "splitting row $i/$nrow";
# split lines containing genotypes, by allele, for each individual
#odd alleles
odd=$(sed -n $i"p" "$f" | tr ' ' '\n' | tail -n +7 | sed -n 'p;n' | tr '\n' ' ' | sed 's/ $//g');
#even alleles
even=$(sed -n $i"p" "$f" | tr ' ' '\n' | tail -n +7 | sed -n 'g;n;p' | tr '\n' ' ' | sed 's/ $//g');
echo "$odd" > "$i.tmp";
echo "$even" >> "$i.tmp";
}
export -f dbl;
nchr=4; #number of chromosomes
iroot="/home/cuser/Ankita-2017/shapeit/shapeit.v2.904.3.10.0-693.11.6.el7.x86_64/final4_"; #root string of input files for each chromosome
oroot="/home/cuser/Ankita-2017/shapeit/shapeit.v2.904.3.10.0-693.11.6.el7.x86_64/final4_"; #root string for output files, in fastPHASE format
for ((i=1;i<=$nchr;i++)); do
echo "chr"$i;
done
export infile="$iroot""$i";
h1=$(wc -l final4_"$i".ped | awk '{print $1}'); #n samples for fastPHASE header line 1
h2a=$(head -1 final4_"$i".ped | awk '{print NF}');
h2=$((($h2a-6)/2)); #n loci for fastPHASE header line 2
h3a=$(awk '{print $4}' final4_"$i".map);
h3=$(echo "P $h3a" | tr "\n" " "); #locus positions for fastPHASE header line 3
s=$(awk '{print $2}' final4_"$i".ped);
#split single line per sample plink format into two line per sample fastPHASE format
seq 1 $h1 | parallel --env dbl --env final4_"$i".ped dbl;
#seq 1..4 $h1 | dbl final4_"$i".ped dbl;
#Reassemble into a single file:
outfile="$oroot""$i".inp;
echo "$h1" > "$outfile";
echo "$h2" >> "$outfile";
echo "$h3" >> "$outfile";
for ((j=1;j<=$h1;j++));
do sample=$(echo "$s" | sed -n $j"p")" ";
echo "writing chr $i sample $sample $j/$h1";
echo '>'"$sample" >> "$outfile";
head -1 "$j.tmp" | sed 's/0/?/g' >> "$outfile";
tail -n +2 "$j.tmp" | sed 's/0/?/g' >> "$outfile";
rm "$j.tmp";
done;
done;
####I ran the above code and getting the following error.
writing chr 5 sample SG1-3 1/165
writing chr 5 sample SG1-14 2/165
head: cannot open '2.tmp' for reading: No such file or directory
tail: cannot open '2.tmp' for reading: No such file or directory
rm: cannot remove '2.tmp': No such file or directory
can you please help