Announcement

BIRLA INSTITUTE OF SCIENTIFIC RESEARCH, JAIPUR




Day2 Exercise - Session II
Next Generation Sequencing Exercises

Go Back

GATK Pipelines

    Instruction for deploying GATK practice pipeline for variant calling
  1. software/FastQC/fastqc -o  sample1 -f fastq  sample1/sample1.fastqsample1/fastqc_sample1.log 
  2. software/bwa/bwa mem -R '@RG\tID:AA\tSM:AA\tLB:AA\tPL:IONTORRENT' genome/hg19/hg19index/bwa/hg19.fasta  sample1/sample1.fastq > sample1/bwa_sample1.sam 2>  sample1/bwa_sample1.log 
  3. software/samtools/samtools view -bS  sample1/bwa_sample1.sam > sample1/bwa_sample1.bam 2> sample1/bwa_sample1_samtobam.log 
  4. ###pre
  5.  cp  sample1/bwa_sample1.bam  sample1/sample1_merged.bam 
  6.  java -Xmx16g -jar  software/picard/picard.jar SortSam I=sample1/sample1_merged.bam O= sample1/sample1_merged.sorted.bam SO=coordinate TMP_DIR= sample1/tmp >&  sample1/sample1_merged.sorted.log
  7.  software/samtools/samtools index  sample1/sample1_merged.sorted.bam >& sample1/sample1_merged.sorted.bam.index.log
  8.  java -Xmx4g -jar  software/picard/picard.jar MarkDuplicates I=sample1/sample1_merged.sorted.bam O= sample1/sample1_merged.sorted.markdup.bam REMOVE_DUPLICATES=true AS=true METRICS_FILE= sample1/sample1_merged.sorted.markdup.metrics VALIDATION_STRINGENCY=SILENT >&  sample1/sample1_merged.sorted.markeddup.log
  9.  software/samtools/samtools index  sample1/sample1_merged.sorted.markdup.bam >& sample1/sample1_merged.sorted.markdup.index.log
  10.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T RealignerTargetCreator -I  sample1/sample1_merged.sorted.markdup.bam -nt 1 -known  genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -known  genome/1000G_phase1.indels.hg19.vcf -o  sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.list -log  sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.log
  11.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T IndelRealigner -I  sample1/sample1_merged.sorted.markdup.bam -targetIntervals  sample1/sample1_merged.sorted.markdup.bam.RealignerTargetCreator.list -known  genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -known  genome/1000G_phase1.indels.hg19.vcf -log  sample1/sample1_merged.sorted.markdup.bam.indelrealigner.log -o  sample1/sample1_merged.sorted.markdup.realigned.bam
  12.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T BaseRecalibrator -log  sample1/sample1_merged.sorted.markdup.bam.indelrealigner.baserecalibrator.log -knownSites  genome/dbSNP_138.hg19.vcf -knownSites  genome/Mills_and_1000G_gold_standard.indels.hg19.vcf -knownSites  genome/1000G_phase1.indels.hg19.vcf -I  sample1/sample1_merged.sorted.markdup.realigned.bam -o  sample1/sample1_merged.sorted.markdup.realigned.recal.table
  13.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T PrintReads -log  sample1/sample1_merged.sorted.markdup.realigned.recal.table.printreads.log -I  sample1/sample1_merged.sorted.markdup.realigned.bam -BQSR  sample1/sample1_merged.sorted.markdup.realigned.recal.table -o  sample1/sample1_merged.sorted.markdup.realigned.recal.bam
  14. ###hypo

  15.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T HaplotypeCaller -stand_call_conf 30 -stand_emit_conf 10 -minPruning 10 -I sample1/sample1_merged.sorted.markdup.realigned.recal.bam --dbsnp  dbsnp_138.hg19.vcf -o  Variant/allsamples.GATK.haplotypecaller.raw.vcf >&  Variant/HaplotypeCallder.GATK.vcf.log
  16.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o  Variant/allsamples.GATK.haplotypecaller.var.flt.vcf --variant  Variant/allsamples.GATK.haplotypecaller.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >&  Variant/allsamples.GATK.haplotypecaller.VariantFiltration.log
  17.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o  Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf --variant  Variant/allsamples.GATK.haplotypecaller.var.flt.vcf --excludeFiltered >&  Variant/allsamples.haplotypecallder.var.flt.vcf.excludeFiltered.log
  18.  software/annovar/convert2annovar.pl -format vcf4old --includeinfo  Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf >  Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.log
  19.  software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138 Variant/allsamples.haplotypecaller.var.flt.excludeFiltered.vcf.annovar  software/annovar/humandb/ --outfile  Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.annotation >&  Variant/allsamples.GATK.haplotypecaller.var.flt.excludeFiltered.vcf.annovar.annotation.log
  20. ####samtools

  21.  software/samtools/samtools mpileup -SDug -f genome/hg19/hg19index/bwa/hg19.fasta sample1/sample1_merged.sorted.markdup.realigned.recal.bam |  software/bin/bcftools view -bvcg - >  Variant/allsamples.samtools.var.raw.bcf 2> Variant/allsamples.samtools.var.raw.bcf.log
  22.  software/bcftools/bcftools view  Variant/allsamples.samtools.var.raw.bcf | vcfutils.pl varFilter -D1000 >  Variant/allsamples.samtools.var.raw.vcf 2> Variant/allsamples.samtools.var.flt.vcf.log
  23.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o  Variant/allsamples.samtools.var.flt.vcf --variant  Variant/allsamples.samtools.var.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >&  Variant/allsamples.samtools.var.flt.vcf.log
  24.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o  Variant/allsamples.samtools.var.flt.excludeFiltered.vcf --variant  Variant/allsamples.samtools.var.flt.vcf --excludeFiltered >&  Variant/allsamples.samtools.var.flt.vcf.excludeFiltered.log
  25.  software/annovar/convert2annovar.pl -format vcf4old --includeinfo Variant/allsamples.samtools.var.flt.excludeFiltered.vcf  >  Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.log
  26.  software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138 Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar  software/annovar/humandb/ --outfile  Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.annotation >&  Variant/allsamples.samtools.var.flt.excludeFiltered.vcf.annovar.annotation.log
  27. #######unified

  28.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T UnifiedGenotyper -stand_call_conf 30 -stand_emit_conf 10 -glm BOTH -out_mode EMIT_VARIANTS_ONLY -I  sample1/sample1_merged.sorted.markdup.realigned.recal.bam --dbsnp  dbsnp_138.hg19.vcf -o  Variant/allsamples.GATK.unifiedgenotyper.raw.vcf >&  Variant/UnifiedGenotyper.GATK.vcf.log
  29.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T VariantFiltration -o  Variant/allsamples.GATK.unifiedgenotyper.var.flt.vcf --variant  Variant/allsamples.GATK.unifiedgenotyper.raw.vcf --clusterSize 3 --clusterWindowSize 10 --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" --filterExpression "DP < 6" --filterName "LowCoverage" --filterExpression "QUAL < 30.0" --filterName "VeryLowQual" --filterExpression "QUAL >= 30.0 && QUAL < 50.0" --filterName "LowQual" --filterExpression "QD < 1.5" --filterName "LowQD" --filterExpression "SB > -10.0" --filterName "StrandBias" >&  Variant/allsamples.GATK.unifiedgenotyper.VariantFiltration.log
  30.  java -Xmx4g -jar  software/GenomeAnalysisTK/GenomeAnalysisTK.jar -R genome/hg19/hg19index/bwa/hg19.fasta -T SelectVariants -o  Variant/allsamples.unifiedgenotyper.var.flt.excludeFiltered.vcf --variant  Variant/allsamples.GATK.unifiedgenotyper.var.flt.vcf --excludeFiltered >&  Variant/allsamples.unifiedgenotyper.var.flt.vcf.excludeFiltered.log
  31.  software/annovar/convert2annovar.pl -format vcf4old --includeinfo  Variant/allsamples.unifiedgenotyper.var.flt.excludeFiltered.vcf >  Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar 2> Variant/allsamples.GATK.unifiedgenotyper.var.excludeFiltered.flt.vcf.annovar.log
  32.  software/annovar/summarize_annovar.pl --buildver hg19 --verdbsnp 138  Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar  software/annovar/humandb/ --outfile  Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar.annotation >&  Variant/allsamples.GATK.unifiedgenotyper.var.flt.excludeFiltered.vcf.annovar.annotation.log

************************************************************************


Go Back