Module emblaze.frame_colors
A helper module to compute HSV colors for each frame in an animated DR plot. The colors are chosen such that the perceptual distance between colors corresponds to the difference between the frames, with respect to some set of points of interest.
Expand source code
"""
A helper module to compute HSV colors for each frame in an animated DR plot.
The colors are chosen such that the perceptual distance between colors
corresponds to the difference between the frames, with respect to some set of
points of interest.
"""
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from colormath.color_objects import LabColor, HSLColor
from colormath.color_conversions import convert_color
import itertools
from numba.typed import List
from .utils import Field, inverse_intersection
def _clustered_ordering(distances):
"""
Returns an ordering of the items whose pairwise distances are given.
"""
clusterer = AgglomerativeClustering(n_clusters=len(distances), affinity='precomputed', linkage='average')
clusterer.fit(distances)
def walk_tree(children):
ii = itertools.count(len(distances))
children_dict = {next(ii): [x[0], x[1]] for x in children}
ii = itertools.count(len(distances))
children_list = [(next(ii), x[0], x[1]) for x in children]
stack = [children_list[-1][0]]
ordering = []
while stack:
last = stack.pop()
if last in children_dict:
l, r = children_dict[last]
if l not in children_dict:
ordering.append(l)
else:
stack.append(l)
if r not in children_dict:
ordering.append(r)
else:
stack.append(r)
else:
ordering.append(last)
return ordering
return walk_tree(clusterer.children_)
def _arrange_around_circle(distances, offset, ordering):
"""
Arrange the points represented by the given distance matrix around a circle.
The radius of the circle is a rough measure of 'clusteredness' of the data,
as measured by the average deviation from the mean normalized by the max
distance.
Args:
distances: An n x n distance matrix.
ordering: List of n indexes determining which order to lay the points in.
Returns:
An n x 2 array representing locations around a circle.
"""
# Find thetas first
thetas = [0.0]
theta_distances = distances ** 2
for i in range(1, len(ordering)):
thetas.append(thetas[-1] + theta_distances[ordering[i],ordering[i - 1]])
last_theta = thetas[-1] + theta_distances[ordering[-1], ordering[0]]
thetas = np.array(thetas) * 2 * np.pi / last_theta # scale around the circle
thetas += offset
# thetas += np.random.uniform(0.0, 2.0 * np.pi) # random offset
# Determine radius
# R = np.abs(theta_distances - np.mean(theta_distances)).mean() / np.max(theta_distances)
# absolute distance-based measure
# R = (distances.sum() / (len(distances.flatten()) - len(distances))) / max_dist
R = np.max([distances[i,j] for i in range(distances.shape[0]) for j in range(distances.shape[1]) if i != j])
R = 0.5 * np.log10(1 + 19 * R)
# Create the points
reduced = np.zeros((len(ordering), 2))
for index, theta in zip(ordering, thetas):
reduced[index] = [R * np.cos(theta), R * np.sin(theta)]
return reduced
def compute_colors(frames, ids_of_interest=None, scale_factor=1.0):
"""
Computes HSV colors for each frame.
Args:
frames: A list of Embeddings.
ids_of_interest: A list of IDs to limit distance calculation to. If
None, uses the full contents of each frame.
scale_factor: Amount by which to scale the color wheel. Values larger
than 1 effectively make the colors more saturated and appear more
different.
Returns:
A list of HSV colors, expressed as tuples of (hue, saturation, value).
"""
distance_sample = ids_of_interest or frames[0].ids.tolist()
if len(distance_sample) > 1000:
distance_sample = np.random.choice(distance_sample, size=1000, replace=False).tolist()
# First compute a distance matrix for the IDs for each frame
outer_jaccard_distances = np.zeros((len(frames), len(frames)))
inner_jaccard_distances = np.zeros((len(frames), len(frames)))
for i in range(len(frames)):
frame_1_neighbors = frames[i].get_recent_neighbors()[distance_sample]
for j in range(len(frames)):
frame_2_neighbors = frames[j].get_recent_neighbors()[distance_sample]
# If the id set is the entire frame, there will be no outer neighbors
# so we can just leave this at zero
if ids_of_interest is not None and len(ids_of_interest):
outer_jaccard_distances[i,j] = np.mean(inverse_intersection(frame_1_neighbors,
frame_2_neighbors,
List(distance_sample),
True))
inner_jaccard_distances[i,j] = np.mean(inverse_intersection(frame_1_neighbors,
frame_2_neighbors,
List(distance_sample),
False))
if ids_of_interest is not None and len(ids_of_interest):
if len(ids_of_interest) == 1:
distances = outer_jaccard_distances
else:
distances = 0.5 * (outer_jaccard_distances + inner_jaccard_distances)
else:
distances = inner_jaccard_distances
# Compute clusteredness in each frame (only used to determine offset of colors)
neighbor_dists = [np.log(1 + frame.distances(distance_sample, distance_sample).flatten()) for frame in frames]
clusteredness = np.array([np.abs(ndists - np.mean(ndists)).mean() / np.maximum(np.max(ndists), 1e-3)
for ndists in neighbor_dists])
# Compute an ordering using hierarchical clustering
ordering_indexes = _clustered_ordering(distances)
# Put the most cluster-y embedding first
first_index = np.argmax(clusteredness)
ordering_position = np.argmax(ordering_indexes == first_index)
ordering_indexes = np.concatenate([ordering_indexes[ordering_position:], ordering_indexes[:ordering_position]]).astype(int)
# Arrange the colors around a color wheel in the L*a*b* color space.
offset = clusteredness[first_index]
reduced = _arrange_around_circle(distances, offset, ordering_indexes) #, max_dist=np.array(neighbor_dists).mean())
# Generate colors in L*a*b* space and convert to HSL/HSV
colors = []
for point in reduced:
scaled_point = np.array([point[0] * 100.0 * scale_factor,
point[1] * 100.0 * scale_factor])
lab = LabColor(70.0, scaled_point[1], scaled_point[0])
rgb = convert_color(lab, HSLColor)
colors.append((int(rgb.hsl_h), int(rgb.hsl_s * 100.0), int(rgb.hsl_l * 100.0)))
return colors
Functions
def compute_colors(frames, ids_of_interest=None, scale_factor=1.0)
-
Computes HSV colors for each frame.
Args
frames
- A list of Embeddings.
ids_of_interest
- A list of IDs to limit distance calculation to. If None, uses the full contents of each frame.
scale_factor
- Amount by which to scale the color wheel. Values larger than 1 effectively make the colors more saturated and appear more different.
Returns
A list of HSV colors, expressed as tuples of (hue, saturation, value).
Expand source code
def compute_colors(frames, ids_of_interest=None, scale_factor=1.0): """ Computes HSV colors for each frame. Args: frames: A list of Embeddings. ids_of_interest: A list of IDs to limit distance calculation to. If None, uses the full contents of each frame. scale_factor: Amount by which to scale the color wheel. Values larger than 1 effectively make the colors more saturated and appear more different. Returns: A list of HSV colors, expressed as tuples of (hue, saturation, value). """ distance_sample = ids_of_interest or frames[0].ids.tolist() if len(distance_sample) > 1000: distance_sample = np.random.choice(distance_sample, size=1000, replace=False).tolist() # First compute a distance matrix for the IDs for each frame outer_jaccard_distances = np.zeros((len(frames), len(frames))) inner_jaccard_distances = np.zeros((len(frames), len(frames))) for i in range(len(frames)): frame_1_neighbors = frames[i].get_recent_neighbors()[distance_sample] for j in range(len(frames)): frame_2_neighbors = frames[j].get_recent_neighbors()[distance_sample] # If the id set is the entire frame, there will be no outer neighbors # so we can just leave this at zero if ids_of_interest is not None and len(ids_of_interest): outer_jaccard_distances[i,j] = np.mean(inverse_intersection(frame_1_neighbors, frame_2_neighbors, List(distance_sample), True)) inner_jaccard_distances[i,j] = np.mean(inverse_intersection(frame_1_neighbors, frame_2_neighbors, List(distance_sample), False)) if ids_of_interest is not None and len(ids_of_interest): if len(ids_of_interest) == 1: distances = outer_jaccard_distances else: distances = 0.5 * (outer_jaccard_distances + inner_jaccard_distances) else: distances = inner_jaccard_distances # Compute clusteredness in each frame (only used to determine offset of colors) neighbor_dists = [np.log(1 + frame.distances(distance_sample, distance_sample).flatten()) for frame in frames] clusteredness = np.array([np.abs(ndists - np.mean(ndists)).mean() / np.maximum(np.max(ndists), 1e-3) for ndists in neighbor_dists]) # Compute an ordering using hierarchical clustering ordering_indexes = _clustered_ordering(distances) # Put the most cluster-y embedding first first_index = np.argmax(clusteredness) ordering_position = np.argmax(ordering_indexes == first_index) ordering_indexes = np.concatenate([ordering_indexes[ordering_position:], ordering_indexes[:ordering_position]]).astype(int) # Arrange the colors around a color wheel in the L*a*b* color space. offset = clusteredness[first_index] reduced = _arrange_around_circle(distances, offset, ordering_indexes) #, max_dist=np.array(neighbor_dists).mean()) # Generate colors in L*a*b* space and convert to HSL/HSV colors = [] for point in reduced: scaled_point = np.array([point[0] * 100.0 * scale_factor, point[1] * 100.0 * scale_factor]) lab = LabColor(70.0, scaled_point[1], scaled_point[0]) rgb = convert_color(lab, HSLColor) colors.append((int(rgb.hsl_h), int(rgb.hsl_s * 100.0), int(rgb.hsl_l * 100.0))) return colors