Source code for mlbugdetection.monotonic

import pickle
import numpy as np
from matplotlib import pyplot as plt
from .analysis_report import AnalysisReport

[docs]def monotonicity_mse(predictions): """Monotonicity Mean Square Error Calculates the MSE between a list of prediction brobabilities and the closest monotonic version of this list. Parameters ---------- predictions : List List of prediction probabilities calculated on the check_monotonicity function. Returns ------- desc | asc : List List of closest monotonic version of "predictions". mse_desc | mse_as : int MSE between "predictions" and desc/asc. """ desc = np.minimum.accumulate(predictions) asc = np.maximum.accumulate(predictions) mse_desc = (np.square(predictions - desc)).mean(axis=0) mse_asc = (np.square(predictions - asc)).mean(axis=0) if min(mse_asc,mse_desc) == mse_desc: return desc, min(mse_asc,mse_desc) else: return asc, min(mse_asc,mse_desc)
[docs]def check_monotonicity_single_sample(model, sample, feature, start, stop, step=1): '''Monotonicity Analysis for a single example Parameters ---------- model : sklearn model or str Model that will be used to make predictions. Could be a model object or a path to a model file. sample : pandas.DataFrame Pandas DataFrame containing one row that will be used as base point. feature : str Name of the feature being analysed. start : int The starting value of the feature's interval. stop : int The end value of the feature's interval. step : float, default=1 Size of the step between ranges "start" and "stop". Ex: step = 0.1 between ranges 0 and 1 will result in [0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9] Returns ------- AnalysisReport object with following attributes: For more information: >>> from mlbugdetection.analysis_report import AnalysisReport >>> help(AnalysisReport) model_name : str Name of the model being analysed. analysed_feature : str Name of the feature being analysed. feature_range : tuple Range of values of the feature being analysed: (start, stop). metrics : dictionary Dictionary with all the calculated metrics, such as: 'monotonic' : bool If the list of values is monotonic. 'monotonic_mse': float MSE between the list of values and it`s closest monotonic aproximation. graphs : List List of all the figures created. ''' if len(sample) > 1: raise Exception("Sample must have only one example, please use 'check_monotonicity_multiple_samples' for multiple samples") if type(model) == str: with open(model, 'rb') as f: model = pickle.load(f) report = AnalysisReport() colValues = [] predictions = [] for i in np.arange(start,stop,step): colValues.append(i) sample[feature] = i prediction = model.predict_proba(sample) predictions.append(prediction[0][1]) monotonic = (all(predictions[i] <= predictions[i + 1] for i in range(len(predictions) - 1)) or all(predictions[i] >= predictions[i + 1] for i in range(len(predictions) - 1))) report.model_name = type(model).__name__ report.analysed_feature = feature report.feature_range = (start, stop) fig = plt.figure(figsize=(6, 3), dpi=150) report.graphs.append(fig) if monotonic: report.metrics["monotonic"] = True report.metrics["monotonic_mse"] = 0 else: report.metrics["monotonic"] = False monotonic_curve, m_mse_score = monotonicity_mse(predictions) report.metrics["monotonic_mse"] = m_mse_score plt.plot(colValues, monotonic_curve, linestyle='dashed', color='red', alpha=0.7, label="Monotonic Approximation") plt.plot(colValues, predictions, color='blue', alpha=0.7, label="Predictions Curve") plt.title(f"Model: {type(model).__name__}") plt.xlabel(f'Feature {feature} value') plt.ylabel('Predict proba') plt.legend(loc="lower right") return report
[docs]def check_monotonicity_multiple_samples(model, samples, feature, start, stop, step=1): '''Monotonicity Analysis for multiple examples Parameters ---------- model : sklearn model or str Model that will be used to make predictions. Could be a model object or a path to a model file. samples : pandas.DataFrame Pandas DataFrame containing two or more rows that will be used as base point. feature : str Name of the feature being analysed. start : int The starting value of the feature's interval. stop : int The end value of the feature's interval. step : float, default=1 Size of the step between ranges "start" and "stop". Ex: step = 0.1 between ranges 0 and 1 will result in [0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9] Returns ------- AnalysisReport object with following attributes: For more information: >>> from mlbugdetection.analysis_report import AnalysisReport >>> help(AnalysisReport) model_name : str Name of the model being analysed. analysed_feature : str Name of the feature being analysed. feature_range : tuple Range of values of the feature being analysed: (start, stop). metrics : dictionary Dictionary with all the calculated metrics, such as: 'monotonic' : bool If the list of values is monotonic. 'monotonic_mse': float MSE between the list of values and it`s closest monotonic aproximation. 'monotonic_means_std': float Standard deviation of the means of the predictions probabilities. graphs : List List of all the figures created. ''' if len(samples) < 2: raise Exception("Samples must have multiple examples, please use 'check_monotonicity_single_sample' for single example") if type(model) == str: with open(model, 'rb') as f: model = pickle.load(f) report = AnalysisReport() colValues = [] predictions = [] for i in np.arange(start,stop,step): colValues.append(i) samples[feature] = i prediction = model.predict_proba(samples) predictions.append(np.mean(prediction[0][1])) monotonic = (all(predictions[i] <= predictions[i + 1] for i in range(len(predictions) - 1)) or all(predictions[i] >= predictions[i + 1] for i in range(len(predictions) - 1))) report.model_name = type(model).__name__ report.analysed_feature = feature report.feature_range = (start, stop) fig = plt.figure(figsize=(6, 3), dpi=150) report.graphs.append(fig) if monotonic: report.metrics["monotonic"] = True report.metrics["monotonic_mse"] = 0 else: report.metrics["monotonic"] = False monotonic_curve, m_mse_score = monotonicity_mse(predictions) report.metrics["monotonic_mse"] = m_mse_score plt.plot(colValues, monotonic_curve, linestyle='dashed', color='red', alpha=0.7, label="Monotonic Approximation") report.metrics["monotonic_means_std"] = np.nanstd(predictions) plt.plot(colValues, predictions, color='blue', alpha=0.7, label="Predictions Curve") plt.title(f"Model: {type(model).__name__}") plt.xlabel(f'Feature {feature} value') plt.ylabel('Predict proba') plt.legend(loc="lower right") return report