Source code for mlbugdetection.sanity

import pickle
import pandas as pd
from .analysis_report import AnalysisReport

[docs]def check_type_input_model(model):
    ''' Check the type of the input model and returns the model object '''
    if type(model) == str:
        with open(model, 'rb') as f:
            model = pickle.load(f)
    return model

[docs]def sanity_check(model, samples, target):
    '''Sanity Test
        Analyzes the sanity of a model with samples and 
        return a bool that represents if the tests passed or not.

    Parameters
    ----------
    model : sklearn model or str
        The model to be used for prediction. Could be a model object or a path to a model file.

    samples : pandas DataFrame
        The samples (or sample) to be used for prediction, which the model
        need to predict correctly. 

    target : str
        The name of the column containing the target variable.

    Returns
    -------
    bool True if the model is sane, False otherwise.
    '''
    model = check_type_input_model(model)

    result = model.predict(samples.drop(target, axis=1))
    original = samples[target]

    result = pd.Series(result).reset_index(drop=True)
    origin = original.reset_index(drop=True)
    values = result == origin

    if len(values.value_counts().index) == 2:
        return False

    return values[0]

[docs]def sanity_check_with_indexes(model, samples, target):
    '''Sanity Test With Indexes
        Analyzes the sanity of a model with samples and 
        shows a Analysis Report that shows if the tests passed or not.
        If the tests failed, it will show the indexes of the samples that were misclassified.

    Parameters
    ----------
    model : sklearn model or str
        The model to be used for prediction. Could be a model object or a path to a model file.

    samples : pandas DataFrame
        The samples (or sample) to be used for prediction, which the model
        need to predict correctly. 

    target : str
        The name of the column containing the target variable.

    Returns
    -------
     AnalysisReport object with following attributes:
        For more information:
        >>> from mlbugdetection.analysis_report import AnalysisReport
        >>> help(AnalysisReport)

    model_name : str
        Name of the model being analysed.
    
    analysed_feature : str
        Name of the feature being analysed.

    metrics : dictionary
        Dictionary with all the calculated metrics, such as:
        
        'sanity' : bool
            If the model is sane or not.

        'sanity_indexes': List
            List of indexes of the samples that were misclassified.

    '''
    model = check_type_input_model(model)
    report = AnalysisReport()

    result = model.predict(samples.drop(target, axis=1))
    original = samples[target]

    result = pd.Series(result).reset_index(drop=True)
    origin = original.reset_index(drop=True)
    values = result == origin
    
    report.model_name = type(model).__name__
    report.analysed_feature = target

    if len(values.value_counts().index) == 2:
        report.metrics["sanity"] = False
        report.metrics["sanity_indexes"] = values[values==False].index.to_list()
        return report

    report.metrics["sanity"] = True
    report.metrics["sanity_indexes"] = []
    return report