Table of Contents
1 Helper function definitions
1.1 Plotting
1.2 Organisation
2 No structure
2.1 Checks
2.2 Scan
2.3 Plots
2.3.1 Time
2.3.1.1 Trends
2.3.2 Noise level
2.3.3 Largest cluster
2.3.4 Number of clusters
3 Mono blob
3.1 Checks
4 Di blob
4.1 Checks
5 Quadro blob
5.1 Checks
6 Blobs
6.1 Checks
6.2 Scan
6.3 Plots
6.3.1 Time
6.3.2 Noise level
6.3.3 Largest cluster
6.3.4 Number of clusters
7 Moons
7.1 Checks
7.2 Scan
7.3 Plots
7.3.1 Time
7.3.2 Noise level
7.3.3 Largest cluster
7.3.4 Number of clusters
8 Mono circle
8.1 Checks
9 Circles
9.1 Checks
9.2 Scan
9.3 Plots
9.3.1 Time
9.3.2 Noise level
9.3.3 Largest cluster
9.3.4 Number of clusters
10 Varied
10.1 Checks
10.2 Scan
10.3 Plots
10.3.1 Time
10.3.2 Noise level
10.3.3 Largest cluster
10.3.4 Number of clusters
10.3.5 Evaluation
11 Aniso
11.1 Checks
11.2 Scan
11.3 Plots
11.3.1 Time
11.3.2 Noise level
11.3.3 Largest cluster
11.3.4 Number of clusters
11.3.5 Evaluation
12 Backbone dihedrals
12.1 Checks
12.2 Scan
12.3 Plots
12.3.1 Time
12.3.2 Noise level
12.3.3 Largest cluster
12.3.4 Number of clusters
12.3.5 Evaluation
12.3.5.1 Hierarchical manual
13 Peptide (TICA)
13.1 Checks
13.2 Scan
13.3 Plots
13.3.1 Time
13.3.2 Noise level
13.3.3 Largest cluster
13.3.4 Number of clusters
13.3.5 Evaluation
13.3.6 Hierarchical semi-automatic
14 Langerin (PCA)
14.1 Checks
Cluster parameter scans¶
Remember to compile cnnclustering
with TRACE_CYTHON=0
if timings should be measured.
[1]:
from collections import defaultdict
import itertools
import json
import cnnclustering
from cnnclustering import cluster, plot
from cnnclustering import _fit, _primitive_types, _types
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import KDTree
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm
import helper
[2]:
import importlib
importlib.reload(helper)
[2]:
<module 'helper' from '/home/janjoswig/repo/CommonNNClustering/docsrc/benchmark/helper.py'>
[229]:
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams["font.size"] = 6
mpl.rcParams["figure.figsize"] = (2, 2/1.618)
mpl.rcParams["axes.titlepad"] = 1
[4]:
print(f"{'Python':>20} : ", *sys.version.splitlines())
Python : 3.8.8 (default, Mar 11 2021, 08:58:19) [GCC 8.3.0]
[5]:
print(f"cnnclustering : ", cnnclustering.__version__)
cnnclustering : 0.4.2
Helper function definitions¶
[6]:
neighbours_sorted_recipe = {
"input_data": _types.InputDataExtNeighboursMemoryview,
"neighbours_getter": _types.NeighboursGetterExtLookup,
"neighbours": (_types.NeighboursExtVector, (5000,), {}),
"neighbour_neighbours": (_types.NeighboursExtVector, (5000,), {}),
"metric": _types.MetricExtDummy,
"similarity_checker": _types.SimilarityCheckerExtScreensorted,
"queue": _types.QueueExtFIFOQueue,
"fitter": _fit.FitterExtBFS,
}
[7]:
neighbours_sorted_alternative_recipe = {
"input_data": _types.InputDataPointsSklearnKDTree,
"neighbours_getter": (_types.NeighboursGetterRecomputeLookup, (), {"is_sorted": True}),
"neighbours": (_types.NeighboursExtVector, (5000,), {}),
"neighbour_neighbours": (_types.NeighboursExtVector, (5000,), {}),
"metric": _types.MetricExtDummy,
"similarity_checker": _types.SimilarityCheckerExtScreensorted,
"queue": _types.QueueExtFIFOQueue,
"fitter": _fit.FitterBFS,
}
[8]:
if "CLUSTERING_MAP" not in dir():
CLUSTERING_MAP = {}
[377]:
CLUSTERING_MAP
[377]:
{'backbone': Clustering(input_data=<cnnclustering._types.InputDataExtPointsMemoryview object at 0x7fbbfe08a150>, neighbours_getter=<cnnclustering._types.NeighboursGetterExtBruteForce object at 0x7fbbcb6e9750>, neighbours=<cnnclustering._types.NeighboursExtVector object at 0x7fbbcaeb72f0>, neighbour_neighbours=<cnnclustering._types.NeighboursExtVector object at 0x7fbbcaeb7430>, metric=<cnnclustering._types.MetricExtEuclidean object at 0x7fbbcb6e9e70>, similarity_checker=<cnnclustering._types.SimilarityCheckerExtContains object at 0x7fbbcb728ad0>, queue=<cnnclustering._types.QueueExtFIFOQueue object at 0x7fbbcafc96c0>, fitter=<cnnclustering._fit.FitterExtBFS object at 0x7fbbcb728a30>, predictor=None)}
[9]:
overwrite = False
record_file = pathlib.Path("records/records.json")
if record_file.is_file() and not overwrite:
raise RuntimeError(f"File exists: str(record_file)")
with open(record_file, "w") as fp:
json.dump({k: v.summary._list for k, v in CLUSTERING_MAP.items()}, fp, cls=helper.RecordEncoder, indent=4)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-9-2d943096d177> in <module>
2 record_file = pathlib.Path("records/records.json")
3 if record_file.is_file() and not overwrite:
----> 4 raise RuntimeError(f"File exists: str(record_file)")
5
6 with open(record_file, "w") as fp:
RuntimeError: File exists: str(record_file)
[376]:
with open("records/records.json", "r") as fp:
RECORD_MAP = json.load(fp, object_hook=helper.as_Record)
[217]:
overwrite = False
record_file = pathlib.Path("records/records.json")
if record_file.is_file() and not overwrite:
raise RuntimeError(f"File exists: str(record_file)")
with open(record_file, "w") as fp:
json.dump(RECORD_MAP, fp, cls=helper.RecordEncoder, indent=4)
[396]:
for scan_name, cl in CLUSTERING_MAP.items():
print(scan_name, cl.summary[-1])
backbone -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
10020 1.000 1999 20 None 2 0.642 0.149 00:00:0.837
-----------------------------------------------------------------------------------------------
[216]:
for scan_name, recs in RECORD_MAP.items():
print(scan_name, recs[-1])
backbone -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
10020 1.000 250 20 None 2 0.962 0.005 00:00:0.233
-----------------------------------------------------------------------------------------------
no_structure -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
5000 1.000 150 20 None 1 1.000 0.000 00:00:0.039
-----------------------------------------------------------------------------------------------
blobs -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
5000 0.600 150 20 None 2 0.667 0.000 00:00:0.055
-----------------------------------------------------------------------------------------------
moons -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
5000 0.600 150 20 None 2 0.500 0.000 00:00:0.062
-----------------------------------------------------------------------------------------------
varied -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
5000 0.500 150 20 None 1 0.923 0.077 00:00:0.077
-----------------------------------------------------------------------------------------------
aniso -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
5000 0.600 150 20 None 1 0.999 0.001 00:00:0.059
-----------------------------------------------------------------------------------------------
tica -----------------------------------------------------------------------------------------------
#points r c min max #clusters %largest %noise time
37500 1.000 250 20 None 4 0.828 0.005 00:00:1.761
-----------------------------------------------------------------------------------------------
Plotting¶
[10]:
def calc_histogram(x, hist_props=None, save=False, base_name=None):
hist_props_defaults = {
"bins": 100,
"density": True,
}
if hist_props is not None:
hist_props_defaults.update(hist_props)
histogram, bins = np.histogram(
x,
**hist_props_defaults
)
binmids = 0.5 * (bins[:-1] + bins[1:])
if save:
if base_name is None:
raise ValueError("If 'save=True', need to provide 'basename'")
np.save(f"{base_name}_hist.npy", histogram)
np.save(f"{base_name}_binmids.npy", binmids)
return histogram, binmids
[373]:
def calc_n_neighbours_per_radius(
distances,
points_factor=1,
highlights=None,
save=False,
base_name=None,
hist_props=None):
if hist_props is None:
hist_props = {}
min_n = []
max_n = []
mean_n = []
upper_dist_bound = np.ceil(distances.max())
r_array = np.linspace(
0,
upper_dist_bound,
int(f"{upper_dist_bound:1.0e}".split("e")[0]) * 10 + 1
)
if highlights is not None:
for highlight in highlights:
if highlight in r_array:
np.delete(highlights, highlight)
insert_highlights_at = np.searchsorted(r_array, highlights)
r_array = np.insert(r_array, insert_highlights_at, highlights)
highlights_distributions = {}
for r in r_array:
adjecancy = np.where(distances < r, 1, 0)
n_neighbours = np.sum(adjecancy, axis=0) - 1
n_neighbours *= points_factor
min_n.append(n_neighbours.min())
max_n.append(n_neighbours.max())
mean_n.append(n_neighbours.mean())
if r in highlights:
hist_props["range"] = (min_n[-1], max_n[-1])
h, binmids = calc_histogram(n_neighbours, hist_props=hist_props, save=False)
highlights_distributions[r] = (h, binmids)
min_n = np.asarray(min_n)
max_n = np.asarray(max_n)
mean_n = np.asarray(mean_n)
if save:
if base_name is None:
raise ValueError("If 'save=True', need to provide 'basename'")
np.save(f"{base_name}_r_array.npy", r_array)
np.save(f"{base_name}_min_n.npy", min_n)
np.save(f"{base_name}_max_n.npy", max_n)
np.save(f"{base_name}_mean_n.npy", mean_n)
np.save(f"{base_name}_highlights_dist.npy", highlights_distributions)
return r_array, min_n, max_n, mean_n, highlights_distributions
[12]:
def plot_n_neighbours_per_radius(r_array, min_n, max_n, mean_n, dist_hist_cutoff, n_points):
fig, (full_ax, zoom_ax) = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1]
)
)
full_ax.plot(r_array, min_n)
full_ax.plot(r_array, max_n)
full_ax.plot(r_array, mean_n)
full_ax.axvline(dist_hist_cutoff, color="k", linestyle="--", linewidth=0.5)
intersection_index = np.argwhere(r_array < dist_hist_cutoff)[-1][0]
intersection_ticks = []
for intersection_list in [min_n, max_n, mean_n]:
intersection_y = (
(intersection_list[intersection_index] + intersection_list[intersection_index + 1])
/ 2
)
intersection_ticks.append(intersection_y)
full_ax.hlines(
y=intersection_y,
xmin=0, xmax=dist_hist_cutoff,
color="k", linestyle="--", linewidth=0.5,
transform=full_ax.transData,
zorder=3
)
full_ax.set_yticks((0, n_points))
full_ax.set_yticks(intersection_ticks, minor=True)
full_ax.set_yticklabels(
[int(t) for t in intersection_ticks],
minor=True,
fontsize="x-small"
)
full_ax.set_xlim(0, r_array[-1])
full_ax.set_xlabel("$r$")
full_ax.set_ylabel("# neighbours")
zoom_ax.plot(r_array, min_n)
zoom_ax.plot(r_array, max_n)
zoom_ax.plot(r_array, mean_n)
zoom_ax.set_xlim(0, dist_hist_cutoff)
zoom_ax.set_ylim(0, np.ceil(intersection_ticks[2]))
zoom_ax.set_xlabel("$r$")
zoom_ax.legend(["min", "max", "mean"])
plt.tight_layout(pad=0.1, w_pad=1)
Organisation¶
Ignore/modify these if needed.
[13]:
# Find data sets in ...
data_dir = pathlib.Path(os.path.expandvars("$WD/CommonNN"))
[14]:
# Save observations to ...
figsrc_dir = pathlib.Path(data_dir / "Manuscript/figsrc/Scans")
No structure¶
[75]:
scan_name = "no_structure"
[116]:
data = helper.gen_no_structure_points((5000, 2))
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[116]:
<matplotlib.collections.PathCollection at 0x7fc668d36220>

Checks¶
[78]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[110]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}_dist")
[82]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.00027307491487952
Max: 4.842668307314774
[118]:
r_array, min_n, max_n, mean_n = calc_n_neighbours_per_radius(
distances, save=True, base_name=figsrc_dir / f"{scan_name}"
)
[139]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
1.67, n_points
)

Scan¶
[129]:
scan_name = "no_structure"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[62]:
for record in RECORD_MAP[scan_name]:
clustering.summary.append(record)
[130]:
for r in tqdm(np.arange(0.01, 1.01, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
[131]:
clustering.summary.to_DataFrame()
[131]:
n_points | radius_cutoff | cnn_cutoff | member_cutoff | max_clusters | n_clusters | ratio_largest | ratio_noise | execution_time | |
---|---|---|---|---|---|---|---|---|---|
0 | 5000 | 0.01 | 0 | 20 | <NA> | 0 | 0.0 | 1.0 | 0.000189 |
1 | 5000 | 0.01 | 1 | 20 | <NA> | 0 | 0.0 | 1.0 | 0.000126 |
2 | 5000 | 0.01 | 2 | 20 | <NA> | 0 | 0.0 | 1.0 | 0.000124 |
3 | 5000 | 0.01 | 3 | 20 | <NA> | 0 | 0.0 | 1.0 | 0.000122 |
4 | 5000 | 0.01 | 4 | 20 | <NA> | 0 | 0.0 | 1.0 | 0.000124 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
15095 | 5000 | 1.00 | 146 | 20 | <NA> | 1 | 1.0 | 0.0 | 0.038833 |
15096 | 5000 | 1.00 | 147 | 20 | <NA> | 1 | 1.0 | 0.0 | 0.039439 |
15097 | 5000 | 1.00 | 148 | 20 | <NA> | 1 | 1.0 | 0.0 | 0.038678 |
15098 | 5000 | 1.00 | 149 | 20 | <NA> | 1 | 1.0 | 0.0 | 0.039222 |
15099 | 5000 | 1.00 | 150 | 20 | <NA> | 1 | 1.0 | 0.0 | 0.039001 |
15100 rows × 9 columns
Plots¶
[62]:
scan_name = "no_structure"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[133]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
# "locator": mpl.ticker.LogLocator()
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 150)
[133]:
(0.0, 150.0)

Trends¶
Clustering for fixed CommonNN cutoff \(c\) is fast if the …
… radius cutoff is small: majority of points has not enough neighbours and is immediately declared noise (no checking of the density criterion.
… radius cutoff is large: majority of points has a large share of the total number of points as neighbours and ends up in the same cluster (low number of re-checks)
Generally clustering takes more time if the …
… radius cutoff is large: more neighbours have to be retrieved; more neighbours have to be checked (assuming CommonNN cutoff is comparably low)
… CommoNN cutoff is large: checking takes more time; larger number of re-checks (assuming CommonNN cutoff is comparably low)
Noise level¶
[135]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax, quantity="ratio_noise",
contour_props={
"levels": 100,
"vmin": 0,
"vmax": 1,
},
# convert=lambda x: x * 100
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 150)
[135]:
(0.0, 150.0)

Largest cluster¶
[136]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": 100,
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 150)
[136]:
(0.0, 150.0)

Number of clusters¶
[137]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 150)
[137]:
(0.0, 150.0)

Mono blob¶
[167]:
scan_name = "mono_blob"
[310]:
# The test data
data = helper.gen_blobs_points(
(6000, 2),
shuffle=False
)
data = data[:2000]
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[310]:
<matplotlib.collections.PathCollection at 0x7fbbc5567730>

Checks¶
[311]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

Di blob¶
[167]:
scan_name = "di_blob"
[312]:
# The test data
data = helper.gen_blobs_points(
(6000, 2),
shuffle=False
)
data = data[2000:]
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[312]:
<matplotlib.collections.PathCollection at 0x7fbc00a7e370>

Checks¶
[313]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

Quadro blob¶
[167]:
scan_name = "quad_blob"
[349]:
# The test data
data = helper.gen_blobs_points(
(6000, 2),
centers=np.array([[-5, -5], [-9, 8], [4, -5], [15, 12]]),
shuffle=False
)
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[349]:
<matplotlib.collections.PathCollection at 0x7fbbc7802d00>

Checks¶
[350]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

Blobs¶
[337]:
scan_name = "blobs"
[338]:
# The test data
data = helper.gen_blobs_points((5000, 2))
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[338]:
<matplotlib.collections.PathCollection at 0x7fbbc5870970>

Checks¶
[169]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[170]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}_dist")
[171]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 4.422092256111229e-05
Max: 4.097490617525474
[172]:
r_array, min_n, max_n, mean_n = calc_n_neighbours_per_radius(
distances, save=True, base_name=figsrc_dir / f"{scan_name}"
)
[173]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.18, n_points
)

Scan¶
[151]:
scan_name = "blobs"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[152]:
for r in tqdm(np.arange(0.01, 0.61, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
Plots¶
[166]:
scan_name = "blobs"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[153]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[153]:
(0.0, 150.0)

Noise level¶
[154]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[154]:
(0.0, 150.0)

Largest cluster¶
[155]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[155]:
(0.0, 150.0)

Number of clusters¶
[156]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[156]:
(0.0, 150.0)

Moons¶
[174]:
scan_name = "moons"
[175]:
# The test data
data = helper.gen_moons_points(
(5000, 2),
noise=.05
)
distances = pairwise_distances(data)
n_points = data.shape[0]
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[175]:
<matplotlib.collections.PathCollection at 0x7fc65ed931f0>

Checks¶
[159]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[160]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}_dist")
[161]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 9.927475001661245e-05
Max: 3.892931346774215
[162]:
r_array, min_n, max_n, mean_n = calc_n_neighbours_per_radius(
distances, save=True, base_name=figsrc_dir / f"{scan_name}"
)
[165]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.21, n_points
)

Scan¶
[176]:
scan_name = "moons"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[177]:
for r in tqdm(np.arange(0.01, 0.61, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
Plots¶
[76]:
scan_name = "moons"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[178]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[178]:
(0.0, 150.0)

Noise level¶
[179]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[179]:
(0.0, 150.0)

Largest cluster¶
[180]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[180]:
(0.0, 150.0)

Number of clusters¶
[181]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[181]:
(0.0, 150.0)

Mono circle¶
[301]:
scan_name = "mono_circle"
[305]:
# The test data
data = helper.gen_circles_points(
(5000, 2),
factor=.5,
shuffle=False,
noise=.05
)
data = data[:2500]
n_points = data.shape[0]
distances = pairwise_distances(data)
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[305]:
<matplotlib.collections.PathCollection at 0x7fbbc63fbaf0>

Checks¶
[306]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

Circles¶
[301]:
scan_name = "circles"
[302]:
# The test data
data = helper.gen_circles_points(
(5000, 2),
factor=.5,
noise=.05
)
n_points = data.shape[0]
distances = pairwise_distances(data)
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[302]:
<matplotlib.collections.PathCollection at 0x7fbbc5c45d90>

Checks¶
[190]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[191]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}_dist")
[192]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.0001264168234122532
Max: 4.199475414902217
[193]:
r_array, min_n, max_n, mean_n = calc_n_neighbours_per_radius(
distances, save=True, base_name=figsrc_dir / f"{scan_name}"
)
[194]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.23, n_points
)

Scan¶
[47]:
scan_name = "circles"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[195]:
for r in tqdm(np.arange(0.01, 0.61, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
Plots¶
[81]:
scan_name = "circles"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[196]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[196]:
(0.0, 150.0)

Noise level¶
[197]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[197]:
(0.0, 150.0)

Largest cluster¶
[198]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[198]:
(0.0, 150.0)

Number of clusters¶
[85]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[85]:
(0.0, 150.0)

Varied¶
[351]:
scan_name = "varied"
[352]:
# The test data
data = helper.gen_blobs_points(
(5000, 2),
random_state=170,
cluster_std=[1.0, 2.5, 0.5],
)
n_points = data.shape[0]
distances = pairwise_distances(data)
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[352]:
<matplotlib.collections.PathCollection at 0x7fbbc765fc70>

[353]:
np.save(figsrc_dir / f"{scan_name}/{scan_name}_data.npy", data)
Checks¶
[299]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[285]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}/{scan_name}_dist")
[286]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.00021837824482724168
Max: 5.457257351668825
[300]:
r_array, min_n, max_n, mean_n, highlights_distributions = calc_n_neighbours_per_radius(
distances,
highlights=np.array([0.19, 1.56, 2.97]),
hist_props={"bins": 20},
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}"
)
[289]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.19, n_points
)

Scan¶
[206]:
scan_name = "varied"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[207]:
for r in tqdm(np.arange(0.01, 0.51, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
Plots¶
[160]:
scan_name = "varied"
clustering = CLUSTERING_MAP[scan_name]
[161]:
clustering.summary.to_DataFrame()
[161]:
n_points | radius_cutoff | cnn_cutoff | member_cutoff | max_clusters | n_clusters | ratio_largest | ratio_noise | execution_time | |
---|---|---|---|---|---|---|---|---|---|
0 | 5000 | 0.01 | 0 | 20 | <NA> | 5 | 0.0818 | 0.8856 | 0.001223 |
1 | 5000 | 0.01 | 1 | 20 | <NA> | 6 | 0.0198 | 0.9528 | 0.001058 |
2 | 5000 | 0.01 | 2 | 20 | <NA> | 2 | 0.0174 | 0.9784 | 0.000993 |
3 | 5000 | 0.01 | 3 | 20 | <NA> | 1 | 0.0108 | 0.9892 | 0.000940 |
4 | 5000 | 0.01 | 4 | 20 | <NA> | 1 | 0.0086 | 0.9914 | 0.000867 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7545 | 5000 | 0.50 | 146 | 20 | <NA> | 1 | 0.9252 | 0.0748 | 0.555526 |
7546 | 5000 | 0.50 | 147 | 20 | <NA> | 1 | 0.9252 | 0.0748 | 0.568364 |
7547 | 5000 | 0.50 | 148 | 20 | <NA> | 1 | 0.9242 | 0.0758 | 0.595478 |
7548 | 5000 | 0.50 | 149 | 20 | <NA> | 1 | 0.9238 | 0.0762 | 0.584090 |
7549 | 5000 | 0.50 | 150 | 20 | <NA> | 1 | 0.9228 | 0.0772 | 0.578298 |
7550 rows × 9 columns
Time¶
[208]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 0.5)
ax.set_ylim(0, 150)
[208]:
(0.0, 150.0)

Noise level¶
[209]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.5)
ax.set_ylim(0, 150)
[209]:
(0.0, 150.0)

Largest cluster¶
[210]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 0.5)
ax.set_ylim(0, 150)
[210]:
(0.0, 150.0)

Number of clusters¶
[211]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 0.5)
ax.set_ylim(0, 150)
[211]:
(0.0, 150.0)

Evaluation¶
[166]:
r = 0.2
c = 50
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(0., c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[168]:
fig, ax = plt.subplots()
clustering.evaluate(ax=ax)
[168]:
(<Figure size 600x370.828 with 1 Axes>,
<AxesSubplot:xlabel='$x$', ylabel='$y$'>)

Aniso¶
[213]:
scan_name = "aniso"
[212]:
# The test data
data = helper.gen_blobs_points(
(5000, 2),
random_state=170,
)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
data = np.dot(data, transformation)
n_points = data.shape[0]
distances = pairwise_distances(data)
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data.T,
s=0.5,
)
[212]:
<matplotlib.collections.PathCollection at 0x7fc65ef777f0>

Checks¶
[214]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[215]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}_dist")
[216]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.0001934117086504001
Max: 3.6222164035708793
[217]:
r_array, min_n, max_n, mean_n = calc_n_neighbours_per_radius(
distances, save=True, base_name=figsrc_dir / f"{scan_name}"
)
[218]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.13, n_points
)

Scan¶
[219]:
scan_name = "aniso"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[220]:
for r in tqdm(np.arange(0.01, 0.61, 0.01)):
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(151):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
Plots¶
[99]:
scan_name = "aniso"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[221]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[221]:
(0.0, 150.0)

Noise level¶
[222]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[222]:
(0.0, 150.0)

[223]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 0.13)
ax.set_ylim(0, 150)
[223]:
(0.0, 150.0)

Largest cluster¶
[224]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[224]:
(0.0, 150.0)

Number of clusters¶
[225]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 0.6)
ax.set_ylim(0, 150)
[225]:
(0.0, 150.0)

Evaluation¶
[106]:
r = 0.13
c = 50
neighbours = helper.compute_neighbours(data, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(0., c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[107]:
fig, ax = plt.subplots(figsize=(2, 2/1.618))
clustering.evaluate(ax=ax)
[107]:
(<Figure size 1800x1200 with 1 Axes>, <AxesSubplot:xlabel='$x$', ylabel='$y$'>)

Backbone dihedrals¶
[378]:
scan_name = "backbone"
[379]:
# The test data
data_path = data_dir / "Alanine/projections/phipsi.npy"
data = [p[::400, :] for p in np.load(data_path)]
[380]:
data_concatened = np.array(np.concatenate(data), dtype=np.float64, order="c")
n_points = data_concatened.shape[0]
[368]:
np.save(figsrc_dir / f"{scan_name}/{scan_name}_data.npy", data_concatened)
[381]:
fig, ax = plt.subplots(figsize=(1, 1))
ax.scatter(
*data_concatened.T,
s=0.5,
)
ax.set_xlim((-np.pi, np.pi))
ax.set_ylim((-np.pi, np.pi))
[381]:
(-3.141592653589793, 3.141592653589793)

[382]:
data_alt = _types.InputDataExtPointsMemoryview(data_concatened)
metric = _types.MetricExtEuclideanPeriodicReduced(np.array([2*np.pi, 2*np.pi], dtype=float))
distances = np.zeros((n_points, n_points))
for i in range(n_points):
for j in range(i + 1, n_points):
distances[i, j] = np.sqrt(metric.calc_distance(i, j, data_alt))
tril = np.tril_indices_from(distances)
distances[tril] = distances.T[tril]
Checks¶
[383]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[272]:
h, binmids = calc_histogram(distances.flatten(), save=True, base_name=figsrc_dir / f"{scan_name}/{scan_name}_dist")
[269]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.0001605242388684838
Max: 4.439522323492073
[270]:
scan_name
[270]:
'backbone'
[281]:
r_array, min_n, max_n, mean_n, highlights_distributions = calc_n_neighbours_per_radius(
distances,
highlights=np.array([0.42, 3.09]),
hist_props={"bins": 20},
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}"
)
[ ]:
[386]:
plot_n_neighbours_per_radius(
r_array, min_n, max_n, mean_n,
0.42, n_points
)

Scan¶
[384]:
scan_name = "backbone"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[385]:
for record in RECORD_MAP[scan_name]:
clustering.summary.append(record)
[215]:
# RECORD_MAP[scan_name] = clustering.summary._list
[ ]:
[ ]:
for r in tqdm(np.arange(0.01, 1.01, 0.01)):
neighbours = [np.where(d < r)[0] for d in distances]
for n in neighbours:
n.sort()
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(251, 2000):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
[397]:
scan_name = "backbone"
overwrite = True
record_file = pathlib.Path(f"records/{scan_name}.json")
if record_file.is_file() and not overwrite:
raise RuntimeError(f"File exists: str(record_file)")
with open(record_file, "w") as fp:
json.dump({scan_name: CLUSTERING_MAP[scan_name].summary._list}, fp, cls=helper.RecordEncoder, indent=4)
Plots¶
[137]:
scan_name = "backbone"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[391]:
fig, ax = plt.subplots(figsize=(2, 2/1.618))
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 100,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 2000)
[391]:
(0.0, 2000.0)

Noise level¶
[392]:
# Noise level
fig, ax = plt.subplots(figsize=(2, 2/1.618))
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 2000)
[392]:
(0.0, 2000.0)

Largest cluster¶
[394]:
# Largest cluster
fig, ax = plt.subplots(figsize=(2, 2/1.618))
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 2000)
[394]:
(0.0, 2000.0)

Number of clusters¶
[395]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots(figsize=(2, 2/1.618))
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 1)
ax.set_ylim(0, 2000)
[395]:
(0.0, 2000.0)

Evaluation¶
[69]:
scan_name = "backbone"
clustering = cluster.prepare_clustering(data)
[236]:
plt.close("all")
fig, Ax = plt.subplots(4, 5, figsize=(7.2, 6.0))
c_list = [1, 10, 50, 100]
r_list = [0.5, 0.2, 0.13, 0.09, 0.068]
def get_member_cutoff(r, c):
if r < 0.2:
return 20
return 10
for i, c in enumerate(c_list):
for j, r in enumerate(r_list):
neighbours = [np.where(d < r)[0] for d in distances]
for n in neighbours:
n.sort()
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=get_member_cutoff(r, c), v=False)
clustering._labels = neighbours_clustering._labels
# np.save(figsrc_dir / scan_name / f"{scan_name}_labels_20_30.npy", clustering.labels.labels)
clustering.evaluate(
ax=Ax[i, j],
ax_props={
"title": f"$r={r}$ $c={c}$",
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": (),
"yticks": (),
"xlabel": None,
"ylabel": None,
"aspect": "equal"
},
annotate_pos="random"
)
# for j, r in enumerate(r_list):
# Ax[0, j].annotate(
# xy=(0.5, 1),
# text=f"$r={r}$",
# xycoords='axes fraction',
# )
Ax[-1, 0].set(**{
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": ((-np.pi, 0, np.pi)),
"yticks": ((-np.pi, 0, np.pi)),
"xticklabels": (("$-\pi$", 0, "$\pi$")),
"yticklabels": (("$-\pi$", 0, "$\pi$")),
"xlabel": "$\phi$",
"ylabel": "$\psi$",
"aspect": "equal"
})
fig.tight_layout(pad=0.1, w_pad=0.01, h_pad=0.01)

[246]:
plt.close("all")
fig, Ax = plt.subplots(3, 5, figsize=(7.2, 3.33))
Ax = Ax.flatten()
c_list = [1, 10, 20, 30, 50, 100, 150, 250, 500, 750, 1000, 1500, 2000, 2500, 3000]
r_list = [0.42] * 15
for i, (r, c) in enumerate(zip(r_list, c_list)):
neighbours = [np.where(d < r)[0] for d in distances]
for n in neighbours:
n.sort()
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=10, v=False)
clustering._labels = neighbours_clustering._labels
# np.save(figsrc_dir / scan_name / f"{scan_name}_labels_20_30.npy", clustering.labels.labels)
clustering.evaluate(
ax=Ax[i],
ax_props={
"title": f"$r={r}$ $c={c}$",
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": (),
"yticks": (),
"xlabel": None,
"ylabel": None,
"aspect": "equal"
},
annotate_pos="random"
)
# for j, r in enumerate(r_list):
# Ax[0, j].annotate(
# xy=(0.5, 1),
# text=f"$r={r}$",
# xycoords='axes fraction',
# )
Ax[5].set(**{
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": ((-np.pi, 0, np.pi)),
"yticks": ((-np.pi, 0, np.pi)),
"xticklabels": (("$-\pi$", 0, "$\pi$")),
"yticklabels": (("$-\pi$", 0, "$\pi$")),
"xlabel": "$\phi$",
"ylabel": "$\psi$",
"aspect": "equal"
})
fig.tight_layout(pad=0.1, w_pad=0.01, h_pad=0.01)

[249]:
plt.close("all")
fig, Ax = plt.subplots(3, 5, figsize=(7.2, 3.33))
Ax = Ax.flatten()
c_list = [1, 5, 10, 20, 30, 50, 75, 100, 150, 200, 300, 400, 500, 600, 750]
r_list = [0.2] * 15
for i, (r, c) in enumerate(zip(r_list, c_list)):
neighbours = [np.where(d < r)[0] for d in distances]
for n in neighbours:
n.sort()
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=10, v=False)
clustering._labels = neighbours_clustering._labels
# np.save(figsrc_dir / scan_name / f"{scan_name}_labels_20_30.npy", clustering.labels.labels)
clustering.evaluate(
ax=Ax[i],
ax_props={
"title": f"$r={r}$ $c={c}$",
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": (),
"yticks": (),
"xlabel": None,
"ylabel": None,
"aspect": "equal"
},
annotate_pos="random"
)
# for j, r in enumerate(r_list):
# Ax[0, j].annotate(
# xy=(0.5, 1),
# text=f"$r={r}$",
# xycoords='axes fraction',
# )
Ax[5].set(**{
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": ((-np.pi, 0, np.pi)),
"yticks": ((-np.pi, 0, np.pi)),
"xticklabels": (("$-\pi$", 0, "$\pi$")),
"yticklabels": (("$-\pi$", 0, "$\pi$")),
"xlabel": "$\phi$",
"ylabel": "$\psi$",
"aspect": "equal"
})
fig.tight_layout(pad=0.1, w_pad=0.01, h_pad=0.01)

[250]:
plt.close("all")
fig, Ax = plt.subplots(3, 5, figsize=(7.2, 3.33))
Ax = Ax.flatten()
c_list = [1, 10, 20, 30, 50, 100, 150, 250, 500, 750, 1000, 1500, 2000, 2500, 3000]
r_list = [0.75] * 15
for i, (r, c) in enumerate(zip(r_list, c_list)):
neighbours = [np.where(d < r)[0] for d in distances]
for n in neighbours:
n.sort()
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=10, v=False)
clustering._labels = neighbours_clustering._labels
# np.save(figsrc_dir / scan_name / f"{scan_name}_labels_20_30.npy", clustering.labels.labels)
clustering.evaluate(
ax=Ax[i],
ax_props={
"title": f"$r={r}$ $c={c}$",
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": (),
"yticks": (),
"xlabel": None,
"ylabel": None,
"aspect": "equal"
},
annotate_pos="random"
)
# for j, r in enumerate(r_list):
# Ax[0, j].annotate(
# xy=(0.5, 1),
# text=f"$r={r}$",
# xycoords='axes fraction',
# )
Ax[5].set(**{
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": ((-np.pi, 0, np.pi)),
"yticks": ((-np.pi, 0, np.pi)),
"xticklabels": (("$-\pi$", 0, "$\pi$")),
"yticklabels": (("$-\pi$", 0, "$\pi$")),
"xlabel": "$\phi$",
"ylabel": "$\psi$",
"aspect": "equal"
})
fig.tight_layout(pad=0.1, w_pad=0.01, h_pad=0.01)

Hierarchical manual¶
[207]:
r = 0.5
c = 10
h_clustering = cluster.prepare_clustering(data)
h_clustering._metric = _types.MetricExtEuclideanPeriodicReduced(
np.array([2*np.pi, 2*np.pi], dtype=float)
)
h_clustering.fit(r, c, member_cutoff=10, v=False)
h_clustering.isolate()
[208]:
for sub_clustering, (r, c) in [(h_clustering.children[1], (0.16, 10))]:
sub_clustering.fit(r, c, member_cutoff=20, v=False)
sub_clustering.isolate()
[209]:
fig, (pie_ax, tree_ax) = plt.subplots(1, 2, figsize=(3.33, 3.33 / 1.618 / 2))
h_clustering.pie(
ax=pie_ax,
pie_props={
"radius": 0.6,
"wedgeprops": dict(width=0.6, edgecolor="k")
}
)
graph = h_clustering.to_nx_DiGraph(ignore={0})
shortened_labels = {}
for key in graph.nodes.keys():
shortened_labels[key] = key.rsplit(".", 1)[-1]
cycler = mpl.rcParams["axes.prop_cycle"]
colors = cycler.by_key()["color"]
depth_counter = defaultdict(int)
node_colors = []
for node in graph.nodes:
depth = len(node)
node_colors.append(colors[depth_counter[depth]])
depth_counter[depth] += 1
plot.plot_graph_sugiyama_straight(
graph,
ax=tree_ax,
pos_props = {
"source": "1",
},
draw_props={
"labels": shortened_labels,
"with_labels": True,
"node_shape": "s",
"edgecolors": "k",
"node_size": 100,
"node_color": node_colors, # [cycler.by_key()["color"][int(shortened_labels[node]) - 1] for node in graph.nodes ],
"font_size": 8,
}
)
fig.tight_layout()
# fig.savefig(figsrc_dir / scan_name / f"{scan_name}_pie_tree.png")

[210]:
h_clustering.reel()
h_clustering.labels.sort_by_size()
[212]:
fig, ax = plt.subplots()
h_clustering.evaluate(
ax=ax,
ax_props={
"xlim": (-np.pi, np.pi),
"ylim": (-np.pi, np.pi),
"xticks": ((-np.pi, 0, np.pi)),
"yticks": ((-np.pi, 0, np.pi)),
"xticklabels": (("$-\pi$", 0, "$\pi$")),
"yticklabels": (("$-\pi$", 0, "$\pi$")),
"aspect": "equal"
},
annotate_pos="random"
)
[212]:
(<Figure size 600x370.828 with 1 Axes>,
<AxesSubplot:xlabel='$x$', ylabel='$y$'>)

[494]:
np.save(figsrc_dir / scan_name / f"{scan_name}_labels_reeled.npy", h_clustering.labels.labels)
Peptide (TICA)¶
[369]:
scan_name = "tica"
[370]:
# The test data
data_path = data_dir / "6a5j/projections/tica_500_4.npy"
data = [p[::10, :] for p in np.load(data_path)]
[371]:
data_concatened = np.concatenate(data)
n_points = data_concatened.shape[0]
distances = pairwise_distances(data_concatened[::2])
[364]:
np.save(figsrc_dir / f"{scan_name}/{scan_name}_data.npy", data_concatened)
[293]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(2, 1))
ax1.scatter(
*data_concatened[:, :2].T,
s=0.5,
)
ax2.scatter(
*data_concatened[:, 2:].T,
s=0.5,
)
plt.tight_layout(w_pad=1)

Checks¶
[294]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[295]:
h, binmids = calc_histogram(
distances.flatten(),
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}_dist"
)
[267]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.005043478
Max: 6.638115
[375]:
r_array, min_n, max_n, mean_n, highlights_distributions = calc_n_neighbours_per_radius(
distances,
points_factor=2,
highlights=np.array([0.33, 1.83, 2.62, 3.95]),
hist_props={"bins": 20},
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}"
)
[271]:
plot_n_neighbours_per_radius(
r_array, min_n * 2, max_n * 2, mean_n * 2,
0.3, n_points
)

Scan¶
[272]:
scan_name = "tica"
clustering = CLUSTERING_MAP[scan_name] = cluster.prepare_clustering(data)
[ ]:
for r in tqdm(np.arange(0.01, 1.01, 0.01)):
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
for c in range(251):
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering.summary.append(neighbours_clustering.summary[-1])
[401]:
for record in clustering.summary:
record.radius_cutoff = round(record.radius_cutoff, 5)
Plots¶
[99]:
scan_name = "tica"
clustering = CLUSTERING_MAP[scan_name]
Time¶
[404]:
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="execution_time",
contour_props={
"levels": 50,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax)
colorbar.set_label("time / s")
ax.set_xlim(0.01, 1.0)
ax.set_ylim(0, 250)
[404]:
(0.0, 250.0)

Noise level¶
[405]:
# Noise level
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_noise",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("noise / %")
ax.set_xlim(0.01, 1.0)
ax.set_ylim(0, 250)
[405]:
(0.0, 250.0)

Largest cluster¶
[406]:
# Largest cluster
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="ratio_largest",
contour_props={
"levels": np.arange(0, 1.01, 0.01),
"vmin": 0,
"vmax": 1,
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=(0, 0.5, 1))
colorbar.set_label("largest / %")
ax.set_xlim(0.01, 1.0)
ax.set_ylim(0, 250)
[406]:
(0.0, 250.0)

Number of clusters¶
[407]:
# Number of clusters
show_n = 10
fig, ax = plt.subplots()
contour = clustering.summarize(
ax=ax,
quantity="n_clusters",
contour_props={
"levels": np.arange(-0.5, show_n + 1.5, 1),
"vmin": 0,
"vmax": show_n,
"extend": "max"
}
)[2][0]
colorbar = fig.colorbar(mappable=contour, ax=ax, ticks=range(0, show_n + 1, 2))
colorbar.set_label("# clusters")
ax.set_xlim(0.01, 1.0)
ax.set_ylim(0, 250)
[407]:
(0.0, 250.0)

Evaluation¶
[13]:
scan_name = "tica"
clustering = cluster.prepare_clustering(data)
[410]:
r = 1
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[413]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[414]:
r = 0.75
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[415]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[416]:
r = 0.5
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[417]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[418]:
r = 0.25
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[419]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[420]:
r = 0.15
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[421]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[422]:
r = 0.1
c = 40
neighbours = helper.compute_neighbours(data_concatened, r, sort=True)
neighbours_clustering = cluster.prepare_clustering(
neighbours,
preparation_hook=cluster.prepare_neighbourhoods,
**neighbours_sorted_recipe
)
neighbours_clustering.fit(r, c, member_cutoff=20, v=False)
clustering._labels = neighbours_clustering._labels
[423]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

Hierarchical semi-automatic¶
[12]:
h_clustering = cluster.prepare_clustering(
data,
**neighbours_sorted_alternative_recipe
)
[13]:
r = [0.5, 0.25]
c = 40
h_clustering.fit_hierarchical(r, c, member_cutoff=20)
[14]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
h_clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

[15]:
fig, ax = plt.subplots(figsize=(3.33, 3.33))
h_clustering.pie(ax=ax)
[15]:
(<Figure size 999x999 with 1 Axes>, <AxesSubplot:>)

[16]:
fig, ax = plt.subplots(figsize=(3.33, 1.5))
h_clustering.tree(
ax=ax,
pos_props={"x_spacing": 10, "y_spacing": 0.01},
draw_props={"node_size": 100, "font_size": 6},
)
[16]:
(<Figure size 999x450 with 1 Axes>, <AxesSubplot:>)

[17]:
def trim_trivial(clustering):
if clustering._labels is None:
return
if len(clustering._labels.mapping) == 1:
clustering._labels = None
return
for child in clustering._children.values():
trim_trivial(child)
return
[18]:
trim_trivial(h_clustering)
[19]:
h_clustering.reel()
[20]:
fig, Ax = plt.subplots(
1, 2,
figsize=(
mpl.rcParams["figure.figsize"][0] * 1.5,
mpl.rcParams["figure.figsize"][1],
)
)
for axi, dim in enumerate(range(0, 4, 2)):
h_clustering.evaluate(
ax=Ax[axi],
dim=(dim, dim + 1),
ax_props={"xlabel": None, "ylabel": None, "xticks": (), "yticks": ()}
)

Langerin (PCA)¶
[290]:
scan_name = "pca"
[291]:
# The test data
data_path = data_dir / "6a5j/projections/tica_500_4.npy"
data = [p[::10, :] for p in np.load(data_path)]
[292]:
data_concatened = np.concatenate(data)
n_points = data_concatened.shape[0]
distances = pairwise_distances(data_concatened[::2])
[293]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(2, 1))
ax1.scatter(
*data_concatened[:, :2].T,
s=0.5,
)
ax2.scatter(
*data_concatened[:, 2:].T,
s=0.5,
)
plt.tight_layout(w_pad=1)

Checks¶
[294]:
fig, ax = plt.subplots()
_ = plot.plot_histogram(ax, distances.flatten(), maxima=True, maxima_props={"order": 5})

[295]:
h, binmids = calc_histogram(
distances.flatten(),
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}_dist"
)
[267]:
print("Min: ", distances[distances > 0].min())
print("Max: ", distances.max())
Min: 0.005043478
Max: 6.638115
[296]:
r_array, min_n, max_n, mean_n, highlights_distributions = calc_n_neighbours_per_radius(
distances,
highlights=np.array([0.33, 1.83, 2.62, 3.95]),
hist_props={"bins": 20},
save=True,
base_name=figsrc_dir / f"{scan_name}/{scan_name}"
)
[271]:
plot_n_neighbours_per_radius(
r_array, min_n * 2, max_n * 2, mean_n * 2,
0.3, n_points
)
