BIRLA INSTITUTE OF SCIENTIFIC RESEARCH, JAIPUR
Day2 Exercise - Session II
Next Generation Sequencing Exercises
Go Back
GATK Pipelines
-
Instruction for deploying GATK practice pipeline for variant calling
software/FastQC/fastqc -o sample1 -f fastq sample1/sample1.fastqsample1/fastqc_sample1.log
software/bwa/bwa mem -R '@RG\tID:AA\tSM:AA\tLB:AA\tPL:IONTORRENT' genome/hg19/hg19index/bwa/hg19.fasta sample1/sample1.fastq > sample1/bwa_sample1.sam 2> sample1/bwa_sample1.log
software/samtools/samtools view -bS sample1/bwa_sample1.sam > sample1/bwa_sample1.bam 2> sample1/bwa_sample1_samtobam.log
###pre
cp sample1/bwa_sample1.bam sample1/sample1_merged.bam
java -Xmx16g -jar software/picard/picard.jar SortSam I=sample1/sample1_merged.bam O= sample1/sample1_merged.sorted.bam SO=coordinate TMP_DIR= sample1/tmp >& sample1/sample1_merged.sorted.log
software/samtools/samtools index sample1/sample1_merged.sorted.bam >& sample1/sample1_merged.sorted.bam.index.log
java -Xmx4g -jar software/picard/picard.jar MarkDuplicates I=sample1/sample1_merged.sorted.bam O= sample1/sample1_merged.sorted.markdup.bam REMOVE_DUPLICATES=true AS=true METRICS_FILE= sample1/sample1_merged.sorted.markdup.metrics VALIDATION_STRINGENCY=SILENT >& sample1/sample1_merged.sorted.markeddup.log
software/samtools/samtools index sample1/sample1_merged.sorted.markdup.bam >& sample1/sample1_merged.sorted.markdup.index.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T RealignerTargetCreator -I sample1/sample1_merged.sorted.markdup.bam -nt 1 -known genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -known genome/1000G_phase1.indels.hg19.vcf -o sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.list -log sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T IndelRealigner -I sample1/sample1_merged.sorted.markdup.bam -targetIntervals sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.list -known genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -known genome/1000G_phase1.indels.hg19.vcf -log sample1/sample1_merged.sorted.markdup.bam.indelrealigner.log -o sample1/sample1_merged.sorted.markdup.realigned.bam
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T BaseRecalibrator -log sample1/sample1_merged.sorted.markdup.bam.indelrealigner.baserecalibrator.log -knownSites genome/dbSNP_138.hg19.vcf -knownSites genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -knownSites genome/1000G_phase1.indels.hg19.vcf -I sample1/sample1_merged.sorted.markdup.realigned.bam -o sample1/sample1_merged.sorted.markdup.realigned.recal.table
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T PrintReads -log sample1/sample1_merged.sorted.markdup.realigned.recal.table.printreads.log -I sample1/sample1_merged.sorted.markdup.realigned.bam -BQSR sample1/sample1_merged.sorted.markdup.realigned.recal.table -o sample1/sample1_merged.sorted.markdup.realigned.recal.bam
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T HaplotypeCaller -stand_call_conf 30 -stand_emit_conf 10 -minPruning 10 -I sample1/sample1_merged.sorted.markdup.realigned.recal.bam --dbsnp dbsnp_138.hg19.vcf -o Variant/allsamples.GATK.haplotypecaller.raw.vcf >& Variant/HaplotypeCallder.GATK.vcf.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o Variant/allsamples.GATK.haplotypecaller.var.flt.vcf --variant Variant/allsamples.GATK.haplotypecaller.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >& Variant/allsamples.GATK.haplotypecaller.VariantFiltration.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf --variant Variant/allsamples.GATK.haplotypecaller.var.flt.vcf --excludeFiltered >& Variant/allsamples.haplotypecallder.var.flt.vcf.excludeFiltered.log
software/annovar/convert2annovar.pl -format vcf4old --includeinfo Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf > Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.log
software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138 Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf.annovar software/annovar/humandb/ --outfile Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.annotation >& Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.annotation.log
software/samtools/samtools mpileup -SDug -f genome/hg19/hg19index/bwa/hg19.fasta sample1/sample1_merged.sorted.markdup.realigned.recal.bam | software/bin/bcftools view -bvcg - > Variant/allsamples.samtools.var.raw.bcf 2> Variant/allsamples.samtools.var.raw.bcf.log
software/bcftools/bcftools view Variant/allsamples.samtools.var.raw.bcf | vcfutils.pl varFilter -D1000 > Variant/allsamples.samtools.var.raw.vcf 2> Variant/allsamples.samtools.var.flt.vcf.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o Variant/allsamples.samtools.var.flt.vcf --variant Variant/allsamples.samtools.var.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >& Variant/allsamples.samtools.var.flt.vcf.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o Variant/allsamples.samtools.var.flt.excludeFiltered.vcf --variant Variant/allsamples.samtools.var.flt.vcf --excludeFiltered >& Variant/allsamples.samtools.var.flt.vcf.excludeFiltered.log
software/annovar/convert2annovar.pl -format vcf4old --includeinfo Variant/allsamples.samtools.var.flt.excludeFiltered.vcf > Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.log
software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138 Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar software/annovar/humandb/ --outfile Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.annotation >& Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.annotation.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T UnifiedGenotyper -stand_call_conf 30 -stand_emit_conf 10 -glm BOTH -out_mode EMIT_VARIANTS_ONLY -I sample1/sample1_merged.sorted.markdup.realigned.recal.bam --dbsnp dbsnp_138.hg19.vcf -o Variant/allsamples.GATK.unifiedgenotyper.raw.vcf >& Variant/UnifiedGenotyper.GATK.vcf.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o Variant/allsamples.GATK.unifiedgenotyper.var.flt.vcf --variant Variant/allsamples.GATK.unifiedgenotyper.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >& Variant/allsamples.GATK.unifiedgenotyper.VariantFiltration.log
java -Xmx4g -jar software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o Variant/allsamples.unifiedgenotyper.var.flt.excludeFiltered.vcf --variant Variant/allsamples.GATK.unifiedgenotyper.var.flt.vcf --excludeFiltered >& Variant/allsamples.unifiedgenotyper.var.flt.vcf.excludeFiltered.log
software/annovar/convert2annovar.pl -format vcf4old --includeinfo Variant/allsamples.unifiedgenotyper.var.flt.excludeFiltered.vcf > Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.GATK.unifiedgenotyper.var.excludeFiltered.flt.vcf.annovar.log
software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138 Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar software/annovar/humandb/ --outfile Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar.annotation >& Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar.annotation.log
###hypo
####samtools
#######unified
************************************************************************