Source code for prism.annotate

import cycler
import sys
import cleanlog

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pybedtools as pb
import prism.util as util

logger = cleanlog.ColoredLogger('annotate')

[docs]def scatter_1d(subclone_assignments, num_subclones, depths, fingerprint_fractions, annotation_names, annotation_mask, width, height, dpi): """Generate annotated scatterplot for one-dimensional PRISM analysis. :param list subclone_assignments: List containing subclone assignment status for each fingerprint epilocus. :param int num_subclones: Number of subclones. :param list depths: List of depths (pattern counts). :param list fingerprint_fractions: List of fingerprint fractions. :param list annotation_names: List of the names of annotations. :param list annotation_mask: List containing annotation status for each fingerprint epilocus. :param float width: Figure width. :param float height: Figure height. :param int dpi: Figure DPI. """ fig = plt.figure(figsize=(width, height)) ax = fig.add_subplot(111) ax.grid() ax.set_axisbelow(True) ax.set_xlabel('Fraction of fingerprint') ax.set_ylabel('Depth') for subclone_index in range(num_subclones): mask = (subclone_assignments == subclone_index) ax.scatter( fingerprint_fractions[mask, 0], depths[mask, 0], s=60, alpha=0.2, linewidth=0, ) plt.gca().set_prop_cycle(None) for name, mask in zip(annotation_names, annotation_mask): ax.scatter(fingerprint_fractions[mask, 0], depths[mask, 0], label=name, marker='x', s=60, lw=1.33)
[docs]def scatter_2d(subclone_assignments, num_subclones, fingerprint_fractions, annotation_names, annotation_mask, width, height, dpi): """Generate annotated scatterplot for two-dimensional PRISM analysis. :param list subclone_assignments: List containing subclone assignment status for each fingerprint epilocus. :param int num_subclones: Number of subclones. :param list fingerprint_fractions: List of fingerprint fractions. :param list annotation_names: List of the names of annotations. :param list annotation_mask: List containing annotation status for each fingerprint epilocus. :param float width: Figure width. :param float height: Figure height. :param int dpi: Figure DPI. """ fig = plt.figure(figsize=(width, height)) ax = fig.add_subplot(111) ax.grid() ax.set_axisbelow(True) ax.set_xlabel('FF1') ax.set_ylabel('FF2') for subclone_index in range(num_subclones): mask = (subclone_assignments == subclone_index) ax.scatter( fingerprint_fractions[mask, 0], fingerprint_fractions[mask, 1], s=60, alpha=0.2, linewidth=0, ) plt.gca().set_prop_cycle(None) for name, mask in zip(annotation_names, annotation_mask): ax.scatter(fingerprint_fractions[mask, 0], fingerprint_fractions[mask, 1], label=name, marker='x', s=60, lw=1.33)
[docs]def run(input_fp, output_fp, bed_fps, annotation_names, output_figure_fp=None, dpi=400, width=4, height=4, scale=1, font_family=None): if len(bed_fps) != len(annotation_names): logger.error(f'The number of bed files and their names should match. (Given {len(bed_fps)} bed files and {len(annotation_names)} names.)') util.preset_rc(scale=scale, font_family=font_family) headers = [] subclone_assignments = [] depths = [] fingerprint_fractions = [] with open(input_fp) as inFile: inFile.readline() for line in inFile.readlines(): header, cluster, subclone, d, c, ffs = util.parse_result_line(line) headers.append(header) subclone_assignments.append(subclone) depths.append(d) fingerprint_fractions.append(ffs) header_bed = util.prepare_header_bed(headers) num_annotations = len(annotation_names) beds = [pb.BedTool(fp) for fp in bed_fps] annotation_mask = np.array([[False] * len(headers) for _ in range(num_annotations)]) for annotation_index, bed in enumerate(beds): for epiloci_index, interval in enumerate(header_bed.intersect(bed, c=True)): overlap_counts = int(interval.fields[-1]) # If there's overlap with current bed file with annotations, mark as annotated. annotation_mask[annotation_index][epiloci_index] = (overlap_counts != 0) with open(output_fp, 'w') as outFile: with open(input_fp) as inFile: inFile.readline() for epiloci_index, line in enumerate(inFile.readlines()): for annotation_index in range(num_annotations): annotation = ','.join([annotation_names[i] for i in range(num_annotations) if annotation_mask[i][epiloci_index] == True]) print(line.strip() + '\t%s' % annotation, file=outFile) if output_figure_fp is not None: subclone_assignments = np.array(subclone_assignments) fingerprint_fractions = np.array(fingerprint_fractions) depths = np.array(depths) num_subclones = len(set(subclone_assignments)) n_dim = len(fingerprint_fractions[0]) if n_dim == 1: scatter_1d(subclone_assignments, num_subclones, depths, fingerprint_fractions, annotation_names, annotation_mask, width, height, dpi) elif n_dim == 2: scatter_2d(subclone_assignments, num_subclones, fingerprint_fractions, annotation_names, annotation_mask, width, height, dpi) plt.legend() plt.tight_layout() plt.savefig(output_figure_fp, dpi=dpi)