Source code for iCallSV.dellyVcf2targetSeqView

"""
dellyVcf2targetSeqView
~~~~~~~~~~~~~~~~~~~~~~

:Description: Convert VCF to targetSeqView

"""

'''
Created on Mar 19, 2015
Description: Convert VCF to targetSeqView
#Example:
SampleDesc    Chr1    Start1    End1    LeftSideSegDup    Chr2    Start2    End2    RightSideSeqDup    ValidationStatus    Sample    SplitsSample
Ramos    15    22462315    22462465    TRUE    14    106467050    106467150    TRUE    Failed PCR    1320KB0009MultipleAlnsort.bam    1320KB0009.bam
::Input::
sampleName: Name of the sample that has the structural abberations
sampleBamName: Name of the bam file.
sampleSplitBaName: Name of the split bam file (Use bam file if you dont have split bam file)
vcfFile: Input Delly VCF file for the conversion
outputDir: Directory to write the output file
outputFileName: Name of the output File
::Output::
outputFile: TargetSeqView format text file for a given vcf file.
@author: Ronak H Shah
'''

import vcf
import checkparameters as cp
import logging
import coloredlogs

logger = logging.getLogger('iCallSV.dellyVcf2targetSeqView')
coloredlogs.install(level='DEBUG')

[docs]def Convert2targetSeqView( sampleName, sampleBamName, sampleSplitBamName, vcfFile, outputDir, outputFileName): """This ``converts`` the Delly Vcf file having tumor normal, to tab-delimited format for input to targetSeqView :param str sampleName: str for the name of the sample being analyzed :param str sampleBamName: str for the pair-end reads bam file :param str sampleSplitBamName: str for the split reads bam file :param str vcfFile: str of vcf file to be converted :param str outputDir: str for the output directory :param str outputFileName: str for the output File :return: A str name of tab-delimited file :rtype: str """ logger.info("Convert2targetSeqView: Will convert vcf to targetSeqVie format") cp.checkFile(vcfFile) cp.checkDir(outputDir) logger.info( "Convert2targetSeqView: All Input Parameters look good. Lets convert to tab-delimited file") vcf_reader = vcf.Reader(open(vcfFile, 'r')) outputFile = outputDir + "/" + outputFileName outputHandle = open(outputFile, "w") outputHandle.write( "SampleDesc\tChr1\tStart1\tEnd1\tLeftSideSegDup\tChr2\tStart2\tEnd2\tRightSideSeqDup\tValidationStatus\tSample\tSplitsSample\n") for record in vcf_reader: (chrom1, start1, start2, ciEndNeg, ciEndPos, ciPosNeg, ciPosPos, chrom2, contype, str1, str2) = (None for i in range(11)) chrom1 = record.CHROM start1 = record.POS if("END" in record.INFO): start2 = record.INFO['END'] if("CHR2" in record.INFO): chrom2 = record.INFO['CHR2'] if("CT" in record.INFO): contype = record.INFO['CT'] (startCT, endCT) = contype.split("to") if("CIEND" in record.INFO): ciEndNeg, ciEndPos = record.INFO['CIEND'] if(abs(ciEndNeg) < 50): ciEndNeg = 50 if(abs(ciEndPos) < 50): ciEndNeg = 50 if("CIPOS" in record.INFO): ciPosNeg, ciPosPos = record.INFO['CIPOS'] if(abs(ciPosNeg) < 50): ciPosNeg = 50 if(abs(ciPosPos) < 50): ciPosNeg = 50 outputHandle.write( sampleName + "\t" + str(chrom1) + "\t" + str(int(start1) - abs(int(ciPosNeg))) + "\t" + str(int(start1) + int(ciPosPos)) + "\tFALSE\t" + str(chrom2) + "\t" + str(int(start2) - abs(int(ciEndNeg))) + "\t" + str(int(start2) + int(ciEndPos)) + "\tFALSE\tFailed PCR\t" + str(sampleBamName) + "\t" + str(sampleSplitBamName) + "\n") outputHandle.close() logger.info("Convert2targetSeqView: Finished conversion of Vcf file to targetSeqView file format.") logger.info("Convert2targetSeqView: Output can be found: %s", outputFile) return(outputFile)