#-*- coding: utf-8 -*-
# Author: Stephen Szwiec
# Date: 2023-02-19
# Description: Classification Scoring Module
#
#Copyright (C) 2023 Stephen Szwiec
#
#This file is part of qsarify.
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Classification Scoring Module
This module provides summary information about Classification
"""
import numpy as np
from sklearn.metrics import accuracy_score
[docs]
class ClassifierScore :
"""
Provides summary information about Classification
Parameters
----------
y_data : pandas DataFrame , shape = (n_samples,)
pred_y : pandas DataFrame , shape = (n_samples,)
=> predicted Y values as result of classification
Sub functions
-------
score (self)
tf_table(self)
"""
def __init__ (self,y_data,pred_y) :
"""
Initializes the classifer
"""
# Initialize the variables
self.y_data = y_data
self.pred_y = pred_y
self.real_y = [] #hash y_data
# Hash the y_data
for i in np.array(self.y_data) :
self.real_y.append(i[0])
[docs]
def score (self) :
"""
Calculate accuracy score
Returns
-------
None
"""
# Initialize the variables
n = 0
cnt = 0
# Count the number of wrong predictions
for i in np.array(self.real_y) :
if i != self.pred_y[n] :
cnt += 1
n += 1
print('Number of all :',n) #all data
print('Number of worng :', cnt)
print('AccuracyScore :',accuracy_score(self.real_y, self.pred_y))
[docs]
def tf_table(self) :
"""
Calculate Precision & Recall
Generates a confusion matrix
Returns
-------
None
"""
# Initialize the variables
one = 0
zero = 0
n = 0
cnt = 0
realzero = 0
realone = 0
# Initialize the confusion matrix
for i in np.array(self.y_data) :
if i[0] == 0 :
zero += 1
if i[0] == 1 :
one += 1
# Count the number of wrong predictions
for i in np.array(self.y_data):
if i[0] != self.pred_y[n]:
#print ('real',i[0],'///','pred',y_pred[n])
if i[0] == 0 :
realzero += 1
if i[0] == 1 :
realone += 1
cnt +=1
n += 1
# Print the results
print(('Number of 1 :',one))
print('Number of 0 :',zero)
print('True Positive(real 1 but pred 1) :',one-realone) #TP
print('True Negative(real 0 but pred 0) :',zero-realzero) #TN
print('False Positive(real 0 but pred 1) :',realzero) #FP
print('False Negative(real 1 but pred 0) :',realone) #FN
print('Precision', (one-realone)/((one-realone)+realzero)) # TP / TP+FP
print('Recall',(one-realone)/((one-realone)+realone)) # TP / TP+FN