Source code for iCallSV.dellyVcf2Tab
"""
dellyVcf2Tab
~~~~~~~~~~~~
:Description: This module converts the Delly Vcf file having tumor normal, to tab-delimited format for input to iAnnotateSV
"""
'''
Created on Mar 18, 2015
Description: This module converts the Delly Vcf file having tumor normal, to tab-delimited format for input to iAnnotateSV
@author: Ronak H Shah
::Input::
vcfFile: Input vcf file to convert
outputFileName: Name of the output file
OutputDir: Directory for output file
::Output::
outputFile: Tab-delimited file containing:
chr1: Its the chromosome name for first break point [1,2,3,4,5,6,7 etc..],
pos1: Its the chromosome loaction for first break point [1-based],
str1: Its the read direction for the first break point [0=top/plus/reference, 1=bottom/minus/complement],
chr2: Its the chromosome name for second break point [1,2,3,4,5,6,7 etc..],
pos2: Its the chromosome loaction for second break point [1-based],
str2: Its the read direction for the second break point [0=top/plus/reference, 1=bottom/minus/complement],
'''
import os
import vcf
import checkparameters as cp
import logging
import coloredlogs
logger = logging.getLogger('iCallSV.dellyVcf2Tab')
coloredlogs.install(level='DEBUG')
[docs]def vcf2tab(vcfFile, outputDir, verbose):
"""This ``converts`` the Delly Vcf file having tumor normal, to tab-delimited format for input to iAnnotateSV
:param str vcfFile: str of vcf file to be converted
:param str outputDir: str for the output directory
:param bool verbose: a boolean
:return: A str name of tab-delimited file
:rtype: str
"""
cp.checkFile(vcfFile)
cp.checkDir(outputDir)
if(verbose):
logger.info("dellyVcf2Tab: All Input Parameters look good. Lets convert to tab-delimited file")
vcf_reader = vcf.Reader(open(vcfFile, 'r'))
outputFileName = os.path.splitext((os.path.basename(vcfFile)))[0] + ".tab"
outputFile = outputDir + "/" + outputFileName
outputHandle = open(outputFile, "w")
outputHandle.write("chr1\tpos1\tstr1\tchr2\tpos2\tstr2\n")
for record in vcf_reader:
(chrom1,
start1,
start2,
chrom2,
contype,
str1,
str2) = (None for i in range(7))
chrom1 = record.CHROM
start1 = record.POS
if("END" in record.INFO):
start2 = record.INFO['END']
if("CHR2" in record.INFO):
chrom2 = record.INFO['CHR2']
if("CT" in record.INFO):
contype = record.INFO['CT']
(startCT, endCT) = contype.split("to")
if((int(startCT) == 3) and (int(endCT) == 3)):
str1 = 0
str2 = 0
elif((int(startCT) == 3) and (int(endCT) == 5)):
str1 = 0
str2 = 1
elif((int(startCT) == 5) and (int(endCT) == 3)):
str1 = 1
str2 = 0
elif((int(startCT) == 5) and (int(endCT) == 5)):
str1 = 1
str2 = 1
else:
if(verbose):
logger.info(
"dellyVcf2Tab: The connection type (CT) given in the vcf file is incorrect.CT: %s",
contype)
outputHandle.write(
str(chrom1) +
"\t" +
str(start1) +
"\t" +
str(str1) +
"\t" +
str(chrom2) +
"\t" +
str(start2) +
"\t" +
str(str2) +
"\n")
outputHandle.close()
if(verbose):
logger.info("dellyVcf2Tab: Finished conversion of Vcf file to tab-delimited file")
logger.info("dellyVcf2Tab: Output can be found: %s", outputFile)
return(outputFile)