Once you have cleaned paired end reads, map them to a reference using BWA

map reads
#start idev session if you haven't
idev

#copy over the exercise directory
cds
cd rad_intro
cp -r /work/02260/grovesd/lonestar/intro_to_rad_2017/mapping/mapping_ddRAD_bwa .
cd mapping_ddRAD_bwa

#look at sample fastqs
ls -l *.fq

#we have 3 sets of paired end reads

#check our reference
#(we're using just chromosome3 to keep things fast)
less stickleback_chrom3.fasta

#load the BWA module
module load bwa

#build a BWA index for our reference
bwa index stickleback_chrom3.fasta

#look at the documentation for bwa mem
bwa mem


#map the reads to the indexed reference
bwa mem stickleback_chrom3.fasta sampleA_r1.fq sampleA_r2.fq > sampleA.sam
bwa mem stickleback_chrom3.fasta sampleB_r1.fq sampleB_r2.fq > sampleB.sam
bwa mem stickleback_chrom3.fasta sampleC_r1.fq sampleC_r2.fq > sampleC.sam

#look at results
ls -l *.sam


#sam files are human readable, but take up large amounts of space
#bam files take up less space and are faster to read
#so we should convert the sam files to bams and sort them.
#We will do this with samtools

#load the samtools module
module load samtools

#convert to bam and sort
samtools view -b sampleA.sam | samtools sort -o sampleA.sorted.bam
samtools view -b sampleB.sam | samtools sort -o sampleB.sorted.bam
samtools view -b sampleC.sam | samtools sort -o sampleC.sorted.bam

#note the size difference between sams and bams
ls -lh *.sam
ls -lh *.bam


#Note that all of that could have been done in single command eg:
bwa mem stickleback_chrom3.fasta sampleA_r1.fq sampleA_r2.fq | samtools view -b | samtools sort -o sampleA.sorted.bam

  • No labels