Source code for qsarify.qsar_scoring

# -*- coding: utf-8 -*-
# Author: Stephen Szwiec
# Date: 2023-02-19
# Description: QSAR Scoring Module
"""
Copyright (C) 2023 Stephen Szwiec

This file is part of qsarify.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
import numpy as np
"""
Commonly used scoring functions for QSAR models
"""

[docs] def rmse_score(y_true, y_pred): """ Calculates the RMSE score Parameters ---------- y_true : numpy array , shape (n_samples,) y_pred : numpy array, shape (n_samples,) Returns ------- float """ return np.sqrt(np.mean(np.square(y_true - y_pred)))
[docs] def q2_score(y_true, y_pred): """ Calculates the Q2 score Parameters ---------- y_true : numpy array , shape (n_samples,) y_pred : numpy array, shape (n_samples,) Returns ------- float """ press = np.sum(np.square(y_true - y_pred)) tss = np.sum(np.square(y_true - np.mean(y_true))) return 1 - press/tss
[docs] def q2f_score(y_true, y_pred, y_mean): """ Calculates the Q2_f1 or Q2_f2 score depending on whether the mean is calculated from the training set or the external set Parameters ---------- y_true : numpy array, shape (n_samples,) y_pred : numpy array, shape (n_samples,) y_mean : float, mean of the training (for q2f1) or test (for q2f2) set Returns ------- float """ press = np.sum(np.square(y_true - y_pred)) tss = np.sum(np.square(y_true - y_mean)) return 1 - press/tss
[docs] def q2f3_score(y_true, y_pred, n_train, n_external): """ Calculates the Q2_f3 score Parameters ---------- y_true : numpy array, shape (n_samples,) y_pred : numpy array, shape (n_samples,) n_external : int number of external samples n_train : int number of training samples Returns ------- float """ press = np.sum(np.square(y_true - y_pred)) tss = np.sum(np.square(y_true - np.mean(y_true))) return 1 - (press / n_external) / (tss * n_train)
[docs] def ccc_score(y_true, y_pred): """ Calculates the CCC score Parameters ---------- y_true : numpy array, shape (n_samples,) y_pred : numpy array, shape (n_samples,) Returns ------- float """ mean_true = y_true.mean() mean_pred = y_pred.mean() var_true = y_true.var() var_pred = y_pred.var() covar_true_pred = np.cov(y_true, y_pred)[0,1] return 2 * covar_true_pred / (var_true + var_pred + (mean_true - mean_pred)**2)