#!/usr/bin/env python
"""libPoMo.fasta
================
This module provides functions to read, write and access fasta files.
Objects
-------
Classes:
- :class:`FaStream`, fasta file sequence stream object
- :class:`MFaStream`, multiple alignment fasta file sequence stream object
- :class:`FaSeq`, fasta file sequence object
- :class:`MFaStrFilterProps`, define multiple fasta file filter preferences
Exception Classes:
- :class:`NotAFastaFileError`
Functions:
- :func:`filter_mfa_str()`, filter a given :class:`MFaStream`
according to the filters defined in :class:`MFaStrFilterProps`
- :func:`init_seq()`, initialize fasta sequence stream from file
- :func:`open_seq()`, open fasta file
- :func:`save_as_vcf()`, save a given :class:`FaSeq` in variant call
format (VCF)
- :func:`read_seq_from_fo()`, read a single sequence from file object
- :func:`read_align_from_fo()`, read an alignment from file object
----
"""
__docformat__ = 'restructuredtext'
import libPoMo.seqbase as sb
import libPoMo.vcf as vcf
import sys
import re
[docs]class NotAFastaFileError(sb.SequenceDataError):
"""Exception raised if given fasta file is not valid."""
pass
[docs]def read_seq_from_fo(line, fo, getAlignEndFlag=False):
"""Read a single fasta sequence.
Read a single fasta sequence from file object *fo* and save it to
a new :class:`Seq <libPoMo.seqbase.Seq>` sequence object. Return
the header line of the next fasta sequence and the newly created
sequence. If no new sequence is found, the next header line will
be set to None.
:param str line: Header line of the sequence.
:param fo fo: File object of the fasta file.
:param Boolean getAlignFlag: If set to true, an additional Boolean
value that specifies if a multiple sequence alignment ends, is
returned.
:rtype: (str, Seq) | (str, Seq, Boolean)
"""
def get_sp_name_and_description(fa_header_line):
"""Extract species name and description.
Extract species name and description from a fasta file header
line `fa_header_line`.
"""
lineList = fa_header_line.rstrip().split(maxsplit=1)
name = lineList[0][1:]
description = ""
if len(lineList) > 1:
description = lineList[1]
return (name, description)
def fill_seq_from_fo(line, fo, seq):
"""Read a single fasta sequence.
Read a single fasta sequence from file object `fo` and save it
to `seq`. Returns the next header line and a flag that is set
to true if the end of an alignment is reached (a line only
contains a newline character). If no new sequence is found,
the next header line will be set to None.
:param str line: Header line of the sequence.
:param fo for: File object of the fasta file.
:param Seq seq: The sequence that will be filled.
"""
(name, descr) = get_sp_name_and_description(line)
seq.name = name
seq.descr = descr
data = ""
alignEndFl = False
for line in fo:
if line == '\n':
# Newline found, end of alignment.
alignEndFl = True
elif line[0] == '>':
# New species found in line.
break
else:
data += line.rstrip()
seq.data = data
seq.dataLen = len(data)
if line[0] != '>':
# We reached the end of file.
line = None
return (line, alignEndFl)
seq = sb.Seq()
(newHeaderLine, alignEndFl) = fill_seq_from_fo(line, fo, seq)
if getAlignEndFlag is False:
return (newHeaderLine, seq)
else:
return (newHeaderLine, seq, alignEndFl)
[docs]class FaStream():
"""A class that stores a fasta file sequence stream.
The sequence of one species / individual / chromosome is saved and
functions are provided to read in the next sequence in the file,
if there is any. This saves memory if files are huge and doesn't
increase runtime.
This object is usually initialized with :func:`init_seq`.
:param str name: Name of the stream.
:param Seq firstSeq: First sequence (:class:`Seq
<libPoMo.seqbase.Seq>` object) to be saved.
:param str nextHL: Next header line.
:param fo faFileObject: File object associated with the stream.
:ivar str name: Stream name.
:ivar Seq seq: Saved sequence (:class:`Seq
<libPoMo.seqbase.Seq>` object)
:ivar str nextHeaderLine: Next header line.
:ivar fo fo: File object that points to the start of the data of
the next sequence.
"""
def __init__(self, name, firstSeq, nextHL, faFileObject):
"""Initialize an `FaStream` object; add state objects."""
self.name = name
self.seq = firstSeq
self.nextHeaderLine = nextHL
self.fo = faFileObject
[docs] def print_info(self, maxB=50):
"""Print sequence information.
Print information about this FaStream object, the fasta
sequence stored at the moment the length of the sequence and a
maximum of `maxB` bases (defaults to 50).
"""
print("Associated file object:", self.fo)
print("Next header line:", self.nextHeaderLine)
print("Saved Sequence:")
self.seq.print_fa_header()
print("Printing", maxB, "out of a total of",
self.seq.dataLen, "bases.")
print(self.seq.data[0:maxB])
return
[docs] def read_next_seq(self):
"""Read next fasta sequence in file.
The return value is the name of the next sequence or None if
no next sequence is found.
"""
if self.nextHeaderLine is None:
return None
else:
self.seq.purge()
(nextHL, self.seq) = read_seq_from_fo(self.nextHeaderLine, self.fo)
self.nextHeaderLine = nextHL
return self.seq.name
[docs] def close(self):
"""Close the linked file."""
self.fo.close()
[docs]def read_align_from_fo(line, fo):
"""Read a single fasta alignment.
Read a single fasta alignment from file object *fo* and save it to
new :class:`Seq <libPoMo.seqbase.Seq>` sequence objects. Return
the header line of the next fasta alignment and the newly created
sequences in a list. If no new alignment is found, the next header
line will be set to None.
:param str line: Header line of the sequence.
:param fo fo: File object of the fasta file.
:rtype: (str, [Seq])
"""
alignEndFl = False
seqL = []
headerLn = line
while (alignEndFl is not True) and (headerLn is not None):
(newHeaderLn, seq, alignEndFl) = read_seq_from_fo(headerLn, fo,
getAlignEndFlag=True)
headerLn = newHeaderLn
seq.set_rc()
seqL.append(seq)
return (newHeaderLn, seqL)
[docs]class MFaStream():
"""Store a multiple alignment fasta file sequence stream.
The sequences of one gene / alignment are saved for all species /
individuals / chromosomes. Functions are provided to read in the
next gene / alignment in the file that fulfills the given
criteria, if there is any. This saves memory if files are huge and
doesn't increase runtime.
Initialization of an :class:`MFaStream` opens the given fasta
file, checks if it is in fasta format and reads the first
alignment. The end of an alignment is reached when a line only
contains the newline character. This object can later be used to
parse the whole multiple alignment fasta file.
Alignments can be filtered with :func:`filter_mfa_str()`.
:param str faFileName: File name of the multiple alignment fasta file.
:param int maxskip: Only look *maxskip* lines for the start of a
sequence (defaults to 50).
:param str name: Set the name of the stream to *name*, otherwise
set it to the stripped filename.
:ivar str name: Stream name.
:ivar [Seq] seqL: Saved sequences (:class:`Seq
<libPoMo.seqbase.Seq>` objects) in a list.
:ivar int nSpecies: Number of saved sequences / species in the alignment.
:ivar str nextHeaderLine: Next header line.
:ivar fo fo: File object that points to the start of the data of
the next sequence.
Please close the associated file object with
:func:`FaStream.close` when you don't need it anymore.
"""
def __init__(self, faFileName, maxskip=50, name=None):
"""Open a fasta file and initialize :class:`MFaStream`."""
def add_instance_variables(name, firstSeqL, nextHL, faFileObject):
"""Add state objects."""
self.name = name
self.seqL = firstSeqL
self.nSpecies = len(self.seqL)
self.nextHeaderLine = nextHL
self.fo = faFileObject
flag = False
faFile = sb.gz_open(faFileName)
if name is None:
name = sb.stripFName(faFileName)
# Find the start of the first sequence.
for i in range(0, maxskip):
line = faFile.readline()
if line == '':
raise NotAFastaFileError("File contains no data.")
if line[0] == '>':
# species name found in line
flag = True
break
if flag is False:
raise NotAFastaFileError("Didn't find a species header within " +
maxskip + " lines.")
(nextHL, seqL) = read_align_from_fo(line, faFile)
try:
nextHL = nextHL.rstrip()
except:
pass
add_instance_variables(name, seqL, nextHL, faFile)
[docs] def print_info(self, maxB=50):
"""Print sequence information.
Print information about this MFaStream object, the fasta
sequence stored at the moment the length of the sequence and a
maximum of `maxB` bases (defaults to 50).
"""
print("Associated file object:", self.fo)
print("Next header line:", self.nextHeaderLine)
print("Saved Sequences:")
for i in range(self.nSpecies):
self.seqL[i].print_fa_header()
if self.seqL[i].get_rc() is True:
print("Sequence is reversed and complemented.")
print("Printing", maxB, "out of a total of",
self.seqL[i].dataLen, "bases.")
print(self.seqL[i].data[0:maxB])
return
[docs] def read_next_align(self):
"""Read next alignment in fasta file.
The return value is the name of the newly saved alignment or
None if no next alignment is found.
"""
if self.nextHeaderLine is None:
return None
else:
(nextHL, self.seqL) = read_align_from_fo(self.nextHeaderLine,
self.fo)
self.nextHeaderLine = nextHL
self.nSpecies = len(self.seqL)
return self.seqL[0].name
[docs] def orient(self, firstOnly=False):
"""Orient all sequences of the alignment to be in forward direction.
This is rather slow for long sequences.
:param Boolean firstOnly: If true, orient the first sequence only.
"""
if firstOnly is False:
l = self.nSpecies
elif firstOnly is True:
l = 1
else:
raise ValueError()
for i in range(l):
if self.seqL[i].get_rc() is True:
self.seqL[i].rev_comp()
[docs] def print_msa(self, fo=sys.stdout):
"""Print multiple sequence alignment at point.
:ivar fileObject fo: Print to file object fo. Defaults to
stdout.
"""
for s in self.seqL:
pass
s.print_fa_header(fo=fo)
s.print_data(fo=fo)
print('\n', file=fo)
return
[docs] def close(self):
"""Close the linked file object."""
self.fo.close()
[docs]class MFaStrFilterProps():
"""Define filter preferences for multiple fasta alignments.
Define the properties of the filter to be applied to an
:class:`MFaStream`.
By default, all filters are applied (all variables are set to
True).
:param int nSpecies: Number of species that are aligned.
:ivar Boolean check_all_aligned: Check if all treated species
are available in the alignment (`nSpecies` gives the number
of species, given to the object upon initialization).
:ivar Boolean check_divergence: Check if the divergence of the
reference genome (the first sequence in the alignment) is lower
than `maxDiv` (defaults to 10 percent).
:ivar Boolean check_start_codons: Check if all start codons
are conserved.
:ivar Boolean check_stop_codons: Check if all stop codons are
conserved.
:ivar Boolean check_frame_shifting_gaps: Check, that there
are no frame-shifting gaps.
:ivar Boolean check_for_long_gaps: Check if no gap is longer
than `maxGapLength` (defaults to 30) bases.
:ivar Boolean check_nonsense_codon: Check if there is no
premature stop codon).
:ivar Boolean check_exon_length: Check that the exon is
longer than `minExonLen` (defaults to 21).
:ivar Boolean check_exon_numbers: Check if exon number match
for all sequences in the alignment.
"""
def __init__(self, nSpecies):
self.check_all_aligned = True
self.nSpecies = nSpecies
self.check_divergence = True
self.maxDiv = 0.1
self.check_start_codons = True
self.check_stop_codons = True
self.check_frame_shifting_gaps = True
self.check_for_long_gaps = True
self.maxGapLength = 30
self.check_nonsense_codon = True
self.check_exon_length = True
self.minExonLen = 21
self.check_exon_numbers = True
[docs]def filter_mfa_str(mfaStr, fp, verb=None):
"""Check multiple sequence alignment of an MFaStream.
Multiple sequence alignments usually include alignments that are
not apt for analysis. These low quality alignments need to be
filtered out of the original multiple sequence alignment fasta
file. If `verb` is unset from None, information about any
possible rejection is printed to the standard output.
:ivar MFaStream mfaStr: :class:`MFaStream` object to check.
:ivar MFaStrFilterProps fp: :class:`MFaStrFilterProps`; Properties
of the filter to be applied.
:ivar Boolean verb: Verbosity.
:rtype: Boolean, True if all filters have been passed.
"""
# Define start and stop codon regex strings
startCodon = r"(atg)"
stopCodons = r"(tag|taa|tga)"
# Define regex pattern for indel
indel = r'-'
def check_all_aligned():
if len(mfaStr.seqL) == fp.nSpecies:
return True
else:
if verb is not None:
print(mfaStr.seqL[0].name, "rejection;",
"Not all species are aligned.")
return False
def check_divergence():
s0Data = mfaStr.seqL[0].data
for s in mfaStr.seqL[1:]:
sData = s.data
counts = 0
for i in range(len(s0Data)):
if s0Data[i].lower() != sData[i].lower():
counts += 1
if (counts / len(s0Data)) > fp.maxDiv:
if verb is not None:
print(mfaStr.seqL[0].name, "rejection;",
"Sequences are too diverged.")
return False
return True
def check_start_codons():
pattern = r'^' + startCodon
for s in mfaStr.seqL:
(nEx, nExTot) = s.get_exon_nr()
if nEx == 1:
dataString = s.data
m = re.search(pattern, dataString, re.I)
if m is None:
if verb is not None:
print(s.name, "rejection;",
"Start codons are not preserved.")
return False
return True
def check_stop_codons():
pattern = stopCodons + r'$'
for s in mfaStr.seqL:
(nEx, nExTot) = s.get_exon_nr()
if nEx == nExTot:
dataString = s.data
m = re.search(pattern, dataString, re.I)
if m is None:
if verb is not None:
print(s.name, "rejection;",
"Stop codons are not preserved.")
return False
return True
def check_frame_shifting_gaps():
pattern = indel + r'+'
for s in mfaStr.seqL:
dataString = s.data
i = re.finditer(pattern, dataString, re.I)
for m in i:
# A gap has been found. Check for frame shift.
if ((m.end() - m.start()) % 3) != 0:
if verb is not None:
print(s.name, "rejection;",
"Frame-shifting gap.")
return False
return True
def check_for_long_gaps():
pattern = indel + r'{' + repr(fp.maxGapLength + 1) + r',}'
for s in mfaStr.seqL:
dataString = s.data
m = re.search(pattern, dataString, re.I)
if m is not None:
if verb is not None:
print(s.name, "rejection;",
"A gap is too long.")
return False
return True
def check_nonsense_codon():
pattern = r'(' + stopCodons + r')' + r'(?!$)'
for s in mfaStr.seqL:
dataString = s.data
m = re.search(pattern, dataString, re.I)
if m is not None:
# Stop codon pattern has been found. Check if frame
# is not shifted.
inFr = s.get_in_frame()
if (m.start() + inFr) % 3 is 0:
if verb is not None:
print(s.name, "rejection;",
"A nonsense codon has been found.")
return False
return True
def check_exon_length():
dataStr = mfaStr.seqL[0].data
if len(dataStr) < fp.minExonLen:
if verb is not None:
print(mfaStr.seqL[0].name, "rejection;",
"Exon is too short.")
return False
return True
def check_exon_numbers():
nExTotL = []
for s in mfaStr.seqL:
(nEx, nExTot) = s.get_exon_nr()
nExTotL.append(nExTot)
for i in range(len(nExTotL)):
if nExTotL[0] != nExTotL[i]:
if verb is not None:
print(mfaStr.seqL[0].name, "rejection;",
"Exon numbers do not match.")
return False
return True
if fp.check_all_aligned:
if not check_all_aligned():
return False
if fp.check_for_long_gaps:
if not check_for_long_gaps():
return False
if (fp.nSpecies > 1) and fp.check_divergence:
if not check_divergence():
return False
if fp.check_start_codons:
if not check_start_codons():
return False
if fp.check_stop_codons:
if not check_stop_codons():
return False
if fp.check_frame_shifting_gaps:
if not check_frame_shifting_gaps():
return False
if fp.check_nonsense_codon:
if not check_nonsense_codon():
return False
if fp.check_exon_length:
if not check_exon_length():
return False
if fp.check_exon_numbers:
if not check_exon_numbers():
return False
return True
[docs]class FaSeq():
"""Store sequence data retrieved from a fasta file.
:ivar str name: Name of the `FaSeq` object.
:ivar [Seq] seqL: List of :class:`Seq <libPoMo.seqbase.Seq>`
objects that store the actual sequence data.
:ivar int nSepcies: Number of saved species / individuals /
chromosomes.
"""
def __init__(self):
self.name = ""
self.seqL = []
self.seqD = {}
self.nSpecies = 0
[docs] def print_info(self, maxB=50):
"""Print fasta sequence information.
Print fasta sequence identifier, species names, the length of
the sequence and a maximum of `maxB` bases (defaults to 50).
"""
print("Sequence identifier:", self.name)
for i in range(0, self.nSpecies):
self.seqL[i].print_fa_header()
print("Printing", maxB, "out of a total of",
self.seqL[i].dataLen, "bases.")
print(self.seqL[i].data[0:maxB])
return
[docs] def get_seq_names(self):
"""Return a list with sequence names."""
names = []
for i in range(0, self.nSpecies):
names.append(self.seqL[i].name)
return names
[docs] def get_seq_by_id(self, i):
"""Return sequence number `i` as `Seq` object."""
seq = sb.Seq()
seq = self.seqL[i]
return seq
[docs] def get_seq_base(self, seq, pos):
"""Return base at 1-based position `pos` in sequence with name
`seq`."""
names = self.get_seq_names()
try:
i = names.index(seq)
except:
raise sb.SequenceDataError("Sequence name not found.")
if pos > self.seqL[i].dataLen:
raise sb.SequenceDataError("Position out of range.")
return self.seqL[i].get_base(pos)
[docs] def get_distance(self):
"""Number of segregating bases.
"""
count = 0
for i in range(self.seqL[0].dataLen):
base = self.seqL[0].get_base(i)
for s in range(self.nSpecies):
if base != self.seqL[s].get_base(i):
count += 1
break
return count
[docs]def init_seq(faFileName, maxskip=50, name=None):
"""Open a fasta file and initialize an :class:`FaStream`.
This function tries to open the given fasta file, checks if it is
in fasta format and reads the first sequence. It returns an
:class:`FaStream` object. This object can later be used to parse
the whole fasta file.
Please close the associated file object with
:func:`FaStream.close` when you don't need it anymore.
:param str faFileName: File name of the fasta file.
:param int maxskip: Only look *maxskip* lines for the start of a
sequence (defaults to 50).
:param str name: Set the name of the sequence to *name*, otherwise
set it to the stripped filename.
"""
flag = False
faFile = sb.gz_open(faFileName)
if name is None:
name = sb.stripFName(faFileName)
# Find the start of the first sequence.
for i in range(0, maxskip):
line = faFile.readline()
if line == '':
raise NotAFastaFileError("File contains no data.")
if line[0] == '>':
# species name found in line
flag = True
break
if flag is False:
raise NotAFastaFileError("Didn't find a species header within " +
maxskip + " lines.")
(nextHL, seq) = read_seq_from_fo(line, faFile)
try:
nextHL = nextHL.rstrip()
except:
pass
faStr = FaStream(name, seq, nextHL, faFile)
return faStr
[docs]def open_seq(faFileName, maxskip=50, name=None):
"""Open and read a fasta file.
This function tries to open the given fasta file, checks if it is
in fasta format and reads the sequence(s). It returns an
:class:`FaSeq` object that contains a list of species names, a
list of the respective desriptions and a list with the sequences.
:param str faFileName: Name of the fasta file.
:param int maxskip: Only look *maxskip* lines for the start of a sequence
(defaults to 50).
:param str name: Set the name of the sequence to *name* otherwise
set it to the stripped filename.
"""
def test_sequence(faSequence):
"""Tests if sequences contain data."""
l = faSequence.nSpecies
names = []
for i in range(l):
names.append(faSequence.seqL[i].name)
if faSequence.seqL[i].name == '' or faSequence.seqL[i].data == '':
raise sb.SequenceDataError("Sequence name or data is missing.")
if l > len(set(names)):
raise sb.SequenceDataError("Sequence names are not unique.")
return
fastaSeq = FaSeq()
flag = False
faFile = sb.gz_open(faFileName)
if name is not None:
fastaSeq.name = name
else:
fastaSeq.name = sb.stripFName(faFileName)
# Find the start of the first sequence.
for i in range(0, maxskip):
line = faFile.readline()
if line == '':
raise NotAFastaFileError("File contains no data.")
if line[0] == '>':
# species name found in line
flag = True
break
if flag is False:
raise NotAFastaFileError("Didn't find a species header within " +
maxskip + " lines.")
while line is not None:
(nextLine, seq) = read_seq_from_fo(line, faFile)
line = nextLine
fastaSeq.seqL.append(seq)
fastaSeq.nSpecies += 1
faFile.close()
test_sequence(fastaSeq)
for s in fastaSeq.seqL:
fastaSeq.seqD[s.name] = s
return fastaSeq
[docs]def save_as_vcf(faSeq, ref, VCFFileName):
"""Save the given :classL`FaSeq` in VCF format.
In general, we want to convert a fasta file with various
individuals with the help of a reference that contains one
sequence to a VCF file that contains all the SNPs. This can be
done with this function. Until now it is not possible to do this
conversion for several chromosomes for each individual in one run.
Still, the conversion can be done chromosome by chromosome.
This function saves the SNPs of *faSeq*, a given :class:`FaSeq`
(fasta sequence) object in VCF format to the file *VCFFileName*.
The reference genome *ref*, to which *faSeq* is compared to, needs
to be passed as a :class:`Seq <libPoMo.seqbase.Seq>` object.
The function compares all sequences in *faSeq* to the sequence
given in *ref*. The names of the individuals in the saved VCF
file will be the sequence names of the *faSeq* object.
::
#CHROM = sequence name of the reference
POS = position relative to reference
ID = .
REF = base of reference
ALT = SNP (e.g. 'C' or 'G,T' if 2 different SNPs are present)
QUAL = .
FILTER = .
INFO = .
FORMAT = GT
:param FaSeq faSeq: :class:`FaSeq` object to be converted.
:param Seq ref: :class:`Seq <libPoMo.seqbase.Seq>` object of the
reference sequence.
:param str VCFFileName: Name of the VCF output file.
"""
def get_altBases_string(sAltBases):
"""Return ALT bases string from given `sAltBases`."""
l = len(sAltBases)
if l == 0:
return ''
string = str(sAltBases[0])
if l > 1:
for i in range(1, l):
string += ',' + sAltBases[i]
return string
def get_indiv_string(indivData, altBases, sAltBases):
"""Return the string of the individual data.
Return the string extracted from the indivudal data
`indivData` with SNPs `altBases`. `sAltBases` is the string
with the alternative bases.
E.g.:
REF = A
ALT = C,G
individual i1 has A
individual i2 has C
individual i3 has G
Then the string should look like:
'0\t1\t2'
-> 0 for REF, 1 for first ALT and 2 for second ALT
"""
l = len(indivData)
if not (indivData[0] in altBases):
string = '0'
else:
string = str(sAltBases.index(indivData[0]) + 1)
if l > 1:
for i in range(1, len(indivData)):
if not (indivData[i] in altBases):
string += '\t' + '0'
else:
string += '\t' + str(sAltBases.index(indivData[i]) + 1)
return string
def get_vcf_line(chromName, pos,
refBase, altBaseString, indivString):
"""Print a VCF file line with given data to file `VCFFile`."""
string = chromName + '\t'
string += str(pos) + '\t'
string += '.' + '\t' # id
string += refBase + '\t'
string += altBaseString + '\t'
string += '.' + '\t' # qual
string += '.' + '\t' # filter
string += '.' + '\t' # info
string += "GT" + '\t' # format
string += indivString
return string
if (not isinstance(faSeq, FaSeq)):
raise sb.SequenceDataError("`faSeq` is not an FaSeq object.")
if (not isinstance(ref, sb.Seq)):
raise sb.SequenceDataError("`ref` is not a Seq object.")
if faSeq.nSpecies == 0:
raise sb.SequenceDataError("`faSeq` has no saved sequences.")
for i in range(0, faSeq.nSpecies):
if faSeq.seqL[i].dataLen != ref.dataLen:
raise sb.SequenceDataError(
"Sequence " + faSeq.seqL[i].name +
" has different length than reference.")
VCFFile = sb.gz_open(VCFFileName, mode='w')
print(vcf.get_header_line_string(faSeq.get_seq_names()), file=VCFFile)
# loop over bases
refBase = ''
for i in range(0, ref.dataLen):
refBase = ref.data[i]
altBases = set()
indivData = []
# loop over sequences in faSeq and check if there is a SNP
for s in range(0, faSeq.nSpecies):
indivData.append(faSeq.seqL[s].data[i])
if faSeq.seqL[s].data[i] != refBase:
altBases.add(faSeq.seqL[s].data[i])
sAltBases = sorted(altBases)
altBaseString = get_altBases_string(sAltBases)
indivString = get_indiv_string(indivData, altBases, sAltBases)
if altBases != set():
print(
get_vcf_line(ref.name, i+1, refBase,
altBaseString, indivString),
file=VCFFile)
VCFFile.close()
return