import warnings
from copy import deepcopy
from typing import List, Dict, Union, Optional
import numpy as np
from qiskit.result import Result
from qiskit.providers import BaseBackend, Backend
from qiskit.tools import parallel_map
from qiskit.utils import QuantumInstance
from sklearn.exceptions import NotFittedError
from sklearn.base import ClusterMixin
from ..quantum_estimator import QuantumEstimator
from .centroid_initialization import (
random,
kmeans_plus_plus,
naive_sharding
)
from .qkmeans_circuit import *
logger = logging.getLogger(__name__)
[docs]class QKMeans(ClusterMixin, QuantumEstimator):
"""
The Quantum K-Means algorithm for classification
Note:
The naming conventions follow the KMeans from
sklearn.cluster
Example:
Classify data using the Iris dataset.
.. jupyter-execute::
import numpy as np
import matplotlib.pyplot as plt
from qlearnkit.algorithms import QKMeans
from qiskit import BasicAer
from qiskit.utils import QuantumInstance, algorithm_globals
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
seed = 42
algorithm_globals.random_seed = seed
quantum_instance = QuantumInstance(BasicAer.get_backend('qasm_simulator'),
shots=1024,
optimization_level=1,
seed_simulator=seed,
seed_transpiler=seed)
# Use iris data set for training and test data
X, y = load_iris(return_X_y=True)
num_features = 2
X = np.asarray([x[0:num_features] for x, y_ in zip(X, y) if y_ != 2])
y = np.asarray([y_ for x, y_ in zip(X, y) if y_ != 2])
qkmeans = QKMeans(n_clusters=3,
quantum_instance=quantum_instance
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
qkmeans.fit(X_train)
print(qkmeans.labels_)
print(qkmeans.cluster_centers_)
# Plot the results
colors = ['blue', 'orange', 'green']
for i in range(X_train.shape[0]):
plt.scatter(X_train[i, 0], X_train[i, 1], color=colors[qkmeans.labels_[i]])
plt.scatter(qkmeans.cluster_centers_[:, 0], qkmeans.cluster_centers_[:, 1], marker='*', c='g', s=150)
plt.show()
# Predict new points
prediction = qkmeans.predict(X_test)
print(prediction)
"""
def __init__(self,
n_clusters: int = 6,
quantum_instance: Optional[Union[QuantumInstance, BaseBackend, Backend]] = None,
*,
init: Union[str, np.ndarray] = "kmeans++",
n_init: int = 1,
max_iter: int = 30,
tol: float = 1e-4,
random_state: int = 42,
):
"""
Args:
n_clusters:
The number of clusters to form as well as the number of
centroids to generate.
quantum_instance:
the quantum instance to set. Can be a
:class:`~qiskit.utils.QuantumInstance`, a :class:`~qiskit.providers.Backend`
or a :class:`~qiskit.providers.BaseBackend`
init:
Method of initialization of centroids.
n_init:
Number of time the qkmeans algorithm will be run with
different centroid seeds.
max_iter:
Maximum number of iterations of the qkmeans algorithm for a
single run.
tol:
Tolerance with regard to the difference of the cluster centroids
of two consecutive iterations to declare convergence.
random_state:
Determines random number generation for centroid initialization.
"""
super().__init__(quantum_instance=quantum_instance)
self.n_clusters = n_clusters
self.init = init
self.max_iter = max_iter
self.n_iter_ = 0
self.tol = tol
self.n_init = n_init
self.n_clusters = n_clusters
self.random_state = random_state
self.cluster_centers_ = None
# do not rename : this name is needed for
# `fit_predict` inherited method from
# `ClusterMixin` base class
self.labels_ = None
def _init_centroid(self,
X: np.ndarray,
init: Union[str, np.ndarray],
random_state: int):
"""
Initializes the centroids according to the following criteria:
'kmeans++': Create cluster centroids using the k-means++ algorithm.
'random': Create random cluster centroids.
'naive_sharding': Create cluster centroids using deterministic naive sharding algorithm.
If an array is passed, it should be of shape (n_clusters, n_features)
and gives the initial centers.
Args:
X:
Training dataset.
init:
Method of initialization of centroids.
random_state:
Determines random number generation for centroid initialization.
"""
if isinstance(init, str):
if init == "random":
self.cluster_centers_ = random(X, self.n_clusters, random_state)
elif init == "kmeans++":
self.cluster_centers_ = kmeans_plus_plus(X, self.n_clusters, random_state)
elif init == "naive":
self.cluster_centers_ = naive_sharding(X, self.n_clusters)
else:
raise ValueError(f"Unknown centroids initialization method {init}. "
f"Expected random, kmeans++, naive or vector of "
f"centers, but {init} was provided")
else:
self.cluster_centers_ = init
def _recompute_centroids(self):
"""
Reassign centroid value to be the calculated mean value for each cluster.
If a cluster is empty the corresponding centroid remains the same.
"""
for i in range(self.n_clusters):
if np.sum(self.labels_ == i) != 0:
self.cluster_centers_[i] = np.mean(self.X_train[self.labels_ == i], axis=0)
def _compute_distances_centroids(self,
counts: Dict[str, int]) -> List[int]:
"""
Compute distance, without explicitly measure it, of a point with respect
to all the centroids using a dictionary of counts,
which refers to the following circuit:
.. parsed-literal::
┌───┐ ┌───┐
|0anc>: ┤ H ├────────────■──────┤ H ├────────M
└───┘ | └───┘
┌───┐ ┌────┐ |
|0>: ───┤ H ├───┤ U3 ├───X──────────
└───┘ └────┘ |
┌───┐ ┌────┐ |
|0>: ───┤ H ├───┤ U3 ├───X──────────
└───┘ └────┘
Args:
counts:
Counts resulting after the simulation.
Returns:
The computed distance.
"""
distance_centroids = [0] * self.n_clusters
x = 1
for i in range(0, self.n_clusters):
binary = format(x, "b").zfill(self.n_clusters)
distance_centroids[i] = counts[binary] if binary in counts else 0
x = x << 1
return distance_centroids
def _get_distances_centroids(self,
results: Result) -> np.ndarray:
"""
Retrieves distances from counts via :func:`_compute_distances_centroids`
Args:
results: :class:`~qiskit.Result` object of execution results
Returns:
np.ndarray of distances
"""
counts = results.get_counts()
# compute distance from centroids using counts
distances_list = list(map(lambda count: self._compute_distances_centroids(count), counts))
return np.asarray(distances_list)
def _construct_circuits(self,
X_test: np.ndarray) -> List[QuantumCircuit]:
"""
Creates the circuits to be executed on
the gated quantum computer for the classification
process
Args:
X_test: The unclassified input data.
Returns:
List of quantum circuits created for the computation
"""
logger.info("Starting circuits construction ...")
'''
circuits = []
for xt in X_test:
circuits.append(construct_circuit(xt, self.cluster_centers_, self.n_clusters))
'''
circuits = parallel_map(
construct_circuit,
X_test,
task_args=[self.cluster_centers_, self.n_clusters]
)
logger.info("Done.")
return circuits
[docs] def fit(self,
X: np.ndarray,
y: np.ndarray = None):
"""
Fits the model using X as training dataset
and y as training labels. For the qkmeans algorithm y is ignored.
The fit model creates clusters from the training dataset given as input
Args:
X: training dataset
y: Ignored.
Kept here for API consistency
Returns:
trained QKMeans object
"""
self.X_train = np.asarray(X)
self._init_centroid(self.X_train, self.init, self.random_state)
self.labels_ = np.zeros(self.X_train.shape[0])
error = np.inf
self.n_iter_ = 0
# while error not below tolerance, reiterate the
# centroid computation for a maximum of `max_iter` times
while error > self.tol and self.n_iter_ < self.max_iter:
# construct circuits using training data
# notice: the construction uses the centroids
# which are recomputed after every iteration
circuits = self._construct_circuits(self.X_train)
# executing and computing distances from centroids
results = self.execute(circuits)
distances = self._get_distances_centroids(results)
# assigning clusters and recomputing centroids
self.labels_ = np.argmin(distances, axis=1)
cluster_centers_old = deepcopy(self.cluster_centers_)
self._recompute_centroids()
# evaluating error and updating iteration count
error = np.linalg.norm(self.cluster_centers_ - cluster_centers_old)
self.n_iter_ = self.n_iter_ + 1
if self.n_iter_ == self.max_iter:
warnings.warn(f"QKMeans failed to converge after "
f"{self.max_iter} iterations.")
return self
[docs] def predict(self,
X_test: np.ndarray) -> np.ndarray:
"""Predict the labels of the provided data.
Args:
X_test:
New data to predict.
Returns:
Index of the cluster each sample belongs to.
"""
if self.labels_ is None:
raise NotFittedError(
"This QKMeans instance is not fitted yet. "
"Call 'fit' with appropriate arguments before using "
"this estimator.")
circuits = self._construct_circuits(X_test)
results = self.execute(circuits)
distances = self._get_distances_centroids(results)
predicted_labels = np.argmin(distances, axis=1)
return predicted_labels
[docs] def score(self,
X: np.ndarray,
y: np.ndarray = None,
sample_weight: Optional[np.ndarray] = None) -> float:
"""
Returns Mean Silhouette Coefficient for all samples.
Args:
X: array of features
y: Ignored.
Not used, present here for API consistency by convention.
sample_weight: Ignored.
Not used, present here for API consistency by convention.
Returns:
Mean Silhouette Coefficient for all samples.
"""
from sklearn.metrics import silhouette_score
predicted_labels = self.predict(X)
return silhouette_score(X, predicted_labels)