deepinsight.doctor.prediction package

Submodules

deepinsight.doctor.prediction.classification_fit module

class deepinsight.doctor.prediction.classification_fit.DecisionTreeClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'DECISION_TREE_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.ExtraTreesClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'EXTRA_TREES'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.GBTClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'GBT_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.KNNClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'KNN'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.classification_fit.LARSClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'LARS'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.classification_fit.LogisticRegClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'LOGISTIC_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.NeuralNetworkClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'NEURAL_NETWORK'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.classification_fit.RFClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'RANDOM_FOREST_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.SGDClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SGD_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.SVCClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SVC_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.ScikitClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SCIKIT_MODEL'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.classification_fit.XGBClassification

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'XGBOOST_CLASSIFICATION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
deepinsight.doctor.prediction.classification_fit.classification_fit(modeling_params, split_desc, transformed_train, prediction_type, m_folder=None, gridsearch_done_fn=None, target_map=None, with_sample_weight=False, with_class_weight=True, calibration=None)

Returns (clf, actual_params, prepared_train_X, initial_intrinsic_perf_data) Extracts the best estimator for grid search ones

deepinsight.doctor.prediction.classification_fit.classification_fit_ensemble(modeling_params, core_params, split_desc, data, target, sample_weight=None)

Returns (clf, actual_params, prepared_train_X, initial_intrinsic_perf_data) Extracts the best estimator for grid search ones

deepinsight.doctor.prediction.classification_fit.get_class_weight_dict(train_y)
deepinsight.doctor.prediction.classification_fit.register_classification_algorithm(algorithm)

deepinsight.doctor.prediction.classification_scoring module

class deepinsight.doctor.prediction.classification_scoring.BinaryClassificationModelScorer(modeling_params, clf, out_folder, preds, probas, valid_y, target_map, valid=None, test_df_index=None, sample_weight=None, ignore_num_classes=False)

Bases: deepinsight.doctor.prediction.scoring_base.PredictionModelScorer

score()
class deepinsight.doctor.prediction.classification_scoring.CVBinaryClassificationModelScorer(scorers)

Bases: object

score()
class deepinsight.doctor.prediction.classification_scoring.CVMulticlassModelScorer(scorers)

Bases: object

score()
class deepinsight.doctor.prediction.classification_scoring.ClassificationModelIntrinsicScorer(modeling_params, clf, train_X, train_y, pipeline, out_folder, prepared_X, iipd, calibrate_proba)

Bases: deepinsight.doctor.prediction.scoring_base.PredictionModelIntrinsicScorer

score()
class deepinsight.doctor.prediction.classification_scoring.MulticlassModelScorer(modeling_params, clf, out_folder, preds, probas, valid_y, target_map=None, valid=None, test_df_index=None, sample_weight=None, ignore_num_classes=False)

Bases: deepinsight.doctor.prediction.scoring_base.PredictionModelScorer

get_multiclass_confusion_matrix()
score(optimize_threshold=False)
deepinsight.doctor.prediction.classification_scoring.binary_classif_scoring_add_percentile_and_cond_outputs(pred_df, recipe_desc, model_folder, cond_outputs, target_map)
deepinsight.doctor.prediction.classification_scoring.binary_classification_predict(clf, pipeline, modeling_params, preprocessing_params, target_map, threshold, data, output_probas=True, ensemble_has_target=False)

returns the predicted dataframe. Used by the scoring recipe only at the moment

deepinsight.doctor.prediction.classification_scoring.binary_classification_predict_ensemble(clf, target_map, threshold, data, output_probas=True, has_target=False)

returns (prediction df - one column, probas df)

deepinsight.doctor.prediction.classification_scoring.binary_classification_predict_single(clf, pipeline, modeling_params, preprocessing_params, target_map, threshold, data, output_probas=True)

returns (prediction df - one column, probas df)

deepinsight.doctor.prediction.classification_scoring.binary_classification_scorer_with_valid(modeling_params, clf, valid, out_folder, test_df_index, target_map, with_sample_weight=False)
deepinsight.doctor.prediction.classification_scoring.compute_otimized_threshold(valid_y, probas, modeling_params, sample_weight=None)
deepinsight.doctor.prediction.classification_scoring.format_all_proba_density(classes, target_map, probas, valid_y, sample_weight=None)
deepinsight.doctor.prediction.classification_scoring.format_proba_density(data, sample_weight=None)
deepinsight.doctor.prediction.classification_scoring.is_proba_aware(algorithm, clf)
deepinsight.doctor.prediction.classification_scoring.multiclass_predict(clf, pipeline, modeling_params, preprocessing_params, target_map, data, output_probas=True, ensemble_has_target=False)

returns the predicted dataframe. Used by the scoring recipe and lambda

deepinsight.doctor.prediction.classification_scoring.multiclass_predict_ensemble(clf, target_map, data, output_probas, has_target=False)
deepinsight.doctor.prediction.classification_scoring.multiclass_predict_single(clf, pipeline, modeling_params, preprocessing_params, target_map, data, output_probas)
deepinsight.doctor.prediction.classification_scoring.multiclass_scorer_with_valid(modeling_params, clf, valid, out_folder, test_df_index, target_map=None, with_sample_weight=False)

deepinsight.doctor.prediction.common module

class deepinsight.doctor.prediction.common.PredictionAlgorithm

Bases: object

algorithm = None
get_gridsearcher(modeling_params=None, column_labels=None, m_folder=None, prediction_type='REGRESSION', target_map=None, unprocessed=None)
get_output_params(modeling_params, clf, fit_params)
model_from_params(ingrid, modeling_params, prediction_type)
Given the modeling & input params outputs a tuple containing
  • a grid (sklearn names)
  • a classifier (sklearn object)
  • optional fit_params to be passed to classifier.fit() afterwords
Parameters:
  • ingrid (dict) – Input parameter grid (DKU names)
  • modeling_params (dict) – Modeling params for current model
  • prediction_type (dict) – Prediction type
Returns:

grid, base_clf, fit_params

Return type:

tuple(dict, object, dict)

output_params(ret, clf, fit_params)

Given a fitted classifier, outputs a dict of algorithm params to be stored back to DKU :param ret: Input parameter grid (DKU names) :type ret: dict :param clf: Sklearn Classifier (fitted) :type clf: dict :param fit_params: Fit params :type fit_params: dict :return: Parameter dict (resolved & others) :rtype: dict

supports_weight = True
deepinsight.doctor.prediction.common.build_cv(modeling_params, column_labels, is_classification)
deepinsight.doctor.prediction.common.dump_pretrain_info(clf, train_X, train_y, weight=None, calibration=False)
deepinsight.doctor.prediction.common.get_grid_scorer(modeling_params, prediction_type, target_map=None, unprocessed=None, custom_make_scorer=None)
deepinsight.doctor.prediction.common.get_grid_scorers(modeling_params, prediction_type, target_map=None, unprocessed=None, custom_make_scorer=None)

Returns a scorer, ie a function with signature(clf, X, y)

deepinsight.doctor.prediction.common.get_ingrid(modeling_params, algorithm)

Returns the grid object from the pre-train modeling params for a given algorithm

deepinsight.doctor.prediction.common.get_initial_intrinsic_perf_data(train_X, is_sparse)
deepinsight.doctor.prediction.common.get_max_features(ingrid)
deepinsight.doctor.prediction.common.get_selection_mode(max_features)
deepinsight.doctor.prediction.common.get_threshold_optim_function(modeling_params)

Returns a function that takes (y_true, y_pred) and a ‘greater_is_better’

deepinsight.doctor.prediction.common.greater_is_better(metric, custom_evaluation_metric_gib)
deepinsight.doctor.prediction.common.make_cost_matrix_score(metrics_params)
deepinsight.doctor.prediction.common.make_lift_score(metrics_params)
deepinsight.doctor.prediction.common.pivot_property_to_list(o, proplist)
deepinsight.doctor.prediction.common.prepare_multiframe(train_X, modeling_params)
deepinsight.doctor.prediction.common.python2_friendly_exec(code, ctx_global, ctx_local)
deepinsight.doctor.prediction.common.replace_value_by_empty(element, value=0)
deepinsight.doctor.prediction.common.safe_del(dic, key)
deepinsight.doctor.prediction.common.safe_positive_int(x)
deepinsight.doctor.prediction.common.save_prediction_model(clf, out_params, listener, update_fn, folder)
deepinsight.doctor.prediction.common.scikit_model(modeling_params)
deepinsight.doctor.prediction.common.train_test_split(X, y, test_size, random_state)
deepinsight.doctor.prediction.common.weighted_quantile(values, weights, target_rate, cumsum_weights=None)

deepinsight.doctor.prediction.dt_xgboost module

class deepinsight.doctor.prediction.dt_xgboost.DTXGBClassifier(max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='binary:logistic', booster='gbtree', gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, missing=None, n_jobs=-1, tree_method='auto')

Bases: xgboost.sklearn.XGBClassifier

class_weight = None
fit(X, y, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, sample_weight=None, xgb_model=None)
set_params(**params)
class deepinsight.doctor.prediction.dt_xgboost.DTXGBRegressor(max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='reg:linear', booster='gbtree', gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, random_state=0, missing=None, n_jobs=1, tree_method='auto')

Bases: xgboost.sklearn.XGBRegressor

fit(X, y, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, sample_weight=None, xgb_model=None)
set_params(**params)
deepinsight.doctor.prediction.dt_xgboost.get_xgboost_scorer(metric_name, prediction_type)

deepinsight.doctor.prediction.ensembles module

class deepinsight.doctor.prediction.ensembles.AverageEnsembler

Bases: deepinsight.doctor.prediction.ensembles.Ensembler

ensemble_predictions(preds)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.ClassificationEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.Ensembler

ensemble_predictions(preds)
ensemble_probas(preds)
fit(preds, y, sample_weight=None)
inputs_probas()
outputs_probas()
class deepinsight.doctor.prediction.ensembles.EnsembleModel(core_params, ensemble_params, scorable_pipelines, pipelines_with_target, clfs, ensembler, thresholds=None)

Bases: object

predict(X)
predict_as_dataframe(X)
predict_proba(X)
predict_proba_as_dataframe(X)
set_with_target_pipelines_mode(use_with_target)
class deepinsight.doctor.prediction.ensembles.EnsembleRegressor(ensemble_params, core_params, split_desc)

Bases: object

create_scorable_pipelines(collectors)
fit(X, y, sample_weight=None)

Returns a pair (clf, train_X), where clf is the trained EnsembleModel and train_X is the training data ndarray obtained from the given multiframe

fit_pipelines(X)
class deepinsight.doctor.prediction.ensembles.Ensembler

Bases: object

ensemble_predictions(preds)
fit(preds, y, sample_weight=None)
inputs_probas()
outputs_probas()
class deepinsight.doctor.prediction.ensembles.LinearEnsembler

Bases: deepinsight.doctor.prediction.ensembles.Ensembler

ensemble_predictions(preds)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.LogisticClassifEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.ClassificationEnsembler

ensemble_predictions(preds)
ensemble_probas(preds)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.LogisticProbaEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.ProbabilisticEnsembler

coerce_probas(probas)
ensemble_predictions(preds)
ensemble_probas(preds)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.MedianEnsembler

Bases: deepinsight.doctor.prediction.ensembles.Ensembler

ensemble_predictions(preds)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.ProbabilisticAverageEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.ProbabilisticEnsembler

ensemble_predictions(preds)
ensemble_probas(probas)
fit(preds, y, sample_weight=None)
class deepinsight.doctor.prediction.ensembles.ProbabilisticEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.Ensembler

ensemble_probas(probas)
inputs_probas()
outputs_probas()
class deepinsight.doctor.prediction.ensembles.VotingEnsembler(n_classes)

Bases: deepinsight.doctor.prediction.ensembles.ClassificationEnsembler

ensemble_predictions(preds)
ensemble_probas(preds)
fit(preds, y, sample_weight=None)
deepinsight.doctor.prediction.ensembles.ensemble_from_fitted(core_params, ensemble_params, prep_folders, model_folders, train, with_sample_weight=False, with_class_weight=False)
deepinsight.doctor.prediction.ensembles.extract_probas(p_df, target_map)
deepinsight.doctor.prediction.ensembles.get_classifier_ensembler(n_classes, ensemble_params, preds, y, sample_weight=None, with_class_weight=False)
deepinsight.doctor.prediction.ensembles.get_probabilistic_ensembler(n_classes, ensemble_params, probas, y, sample_weight=None, with_class_weight=False)
deepinsight.doctor.prediction.ensembles.get_regression_ensembler(ensemble_params, preds, y, sample_weight=None)
deepinsight.doctor.prediction.ensembles.get_target_map(ensemble_params)
deepinsight.doctor.prediction.ensembles.is_probabilistic(ensemble_params)

deepinsight.doctor.prediction.feature_selection module

class deepinsight.doctor.prediction.feature_selection.ClassificationCorrelationSelector(params)

Bases: deepinsight.doctor.prediction.feature_selection.DropSelector

get_pruned_names(mf, target)
class deepinsight.doctor.prediction.feature_selection.DropFeatureSelection(kept_columns)

Bases: deepinsight.doctor.prediction.feature_selection.FeatureSelection

get_method()
get_selection_params()
transform(mf)
class deepinsight.doctor.prediction.feature_selection.DropSelector

Bases: deepinsight.doctor.prediction.feature_selection.FeatureSelector

fit(mf, target)
get_pruned_names(mf, target)
class deepinsight.doctor.prediction.feature_selection.FeatureSelection

Bases: object

get_method()
get_selection_params()
to_json()
transform(mf)
class deepinsight.doctor.prediction.feature_selection.FeatureSelectionStep(params, prediction_type)

Bases: deepinsight.doctor.preprocessing.dataframe_preprocessing.Step

static build_selection(method, selection_params)
fit_and_process(input_df, current_mf, output_ppr, generated_features_mapping)
init_resources(resources_handler)
process(input_df, current_mf, output_ppr, generated_features_mapping)
class deepinsight.doctor.prediction.feature_selection.FeatureSelector

Bases: object

fit(mf, target)
class deepinsight.doctor.prediction.feature_selection.LassoSelector(prediction_type, params)

Bases: deepinsight.doctor.prediction.feature_selection.DropSelector

get_pruned_names(mf, target)
class deepinsight.doctor.prediction.feature_selection.NoopFeatureSelection

Bases: deepinsight.doctor.prediction.feature_selection.FeatureSelection

get_method()
get_selection_params()
transform(mf)
class deepinsight.doctor.prediction.feature_selection.PCAFeatureSelection(sparse, input_names, rot, explained_variance=None, means=None)

Bases: deepinsight.doctor.prediction.feature_selection.FeatureSelection

get_method()
get_selection_params()
transform(mf)
class deepinsight.doctor.prediction.feature_selection.PCASelector(params)

Bases: deepinsight.doctor.prediction.feature_selection.FeatureSelector

fit(mf, target)
n_features_from_variance(var)
static use_sparse_pca(mf)
class deepinsight.doctor.prediction.feature_selection.RandomForestSelector(prediction_type, params)

Bases: deepinsight.doctor.prediction.feature_selection.DropSelector

get_pruned_names(mf, target)
class deepinsight.doctor.prediction.feature_selection.RegressionCorrelationSelector(params)

Bases: deepinsight.doctor.prediction.feature_selection.DropSelector

static dense_abs_cor(dense, target, t_mean, t_std)
get_pruned_names(mf, target)
static sparse_abs_cor(sparse, target_sparse, t_mean, t_std)
deepinsight.doctor.prediction.feature_selection.extract_features(mf, sparse=False)
deepinsight.doctor.prediction.feature_selection.get_feature_selector(params, prediction_type)

deepinsight.doctor.prediction.keras_evaluation_recipe module

Execute an evaluation recipe in Keras mode Must be called in a Flow environment

deepinsight.doctor.prediction.keras_evaluation_recipe.main(model_folder, input_dataset_smartname, output_dataset_smartname, metrics_dataset_smartname, recipe_desc, script, preparation_output_schema, cond_outputs=None)

deepinsight.doctor.prediction.keras_scoring_recipe module

Execute a prediction scoring recipe in Keras mode Must be called in a Flow environment

deepinsight.doctor.prediction.keras_scoring_recipe.main(model_folder, input_dataset_smartname, output_dataset_smartname, recipe_desc, script, preparation_output_schema, cond_outputs=None)

deepinsight.doctor.prediction.lars module

class deepinsight.doctor.prediction.lars.DkuLassoLarsClassifier(max_var=0, K=100)

Bases: sklearn.base.BaseEstimator

fit(X, y)
post_process(user_meta)
predict(X)
predict_proba(X)
class deepinsight.doctor.prediction.lars.DkuLassoLarsRegressor(max_var=0)

Bases: sklearn.base.BaseEstimator

fit(X, y)
post_process(user_meta)
predict(X)

deepinsight.doctor.prediction.prediction_model_serialization module

class deepinsight.doctor.prediction.prediction_model_serialization.BinaryModelSerializer(columns, clf, modeling_params, run_folder, target_mapping, calibrate_proba=False)

Bases: deepinsight.doctor.prediction.prediction_model_serialization.ModelSerializer

get_calibrator()
get_model()
class deepinsight.doctor.prediction.prediction_model_serialization.ModelSerializer(columns, clf, modeling_params, run_folder, target_mapping)

Bases: object

get_calibrator()
get_model()

Returns the serializable model for this model, which includes both the algorithm name to serialize and the model data

serialize()
Dump all relevant model-related information to the run_folder. This includes
  • the serialized model
  • the final preprocessed column names, in the order in which they are used by the model
  • in the case of binary or multiclass classification, the class mapping
class deepinsight.doctor.prediction.prediction_model_serialization.MulticlassModelSerializer(columns, clf, modeling_params, run_folder, target_mapping, calibrate_proba=False)

Bases: deepinsight.doctor.prediction.prediction_model_serialization.ModelSerializer

get_calibrator()
get_model()
class deepinsight.doctor.prediction.prediction_model_serialization.RegressionModelSerializer(columns, clf, modeling_params, run_folder)

Bases: deepinsight.doctor.prediction.prediction_model_serialization.ModelSerializer

get_model()
class deepinsight.doctor.prediction.prediction_model_serialization.SerializableModel(name, model)

Bases: object

deepinsight.doctor.prediction.reg_evaluation_recipe module

Execute an evaluation recipe in PyRegular mode Must be called in a Flow environment

deepinsight.doctor.prediction.reg_evaluation_recipe.add_evaluation_columns(prediction_type, pred_df, y, target_mapping)
deepinsight.doctor.prediction.reg_evaluation_recipe.compute_binary_classification_metrics(modeling_params, valid_y, preds, probas=None, sample_weight=None, unprocessed=None)
deepinsight.doctor.prediction.reg_evaluation_recipe.compute_metrics_df(prediction_type, inv_map, modeling_params, output_df, recipe_desc, y, unprocessed, sample_weight=None)
deepinsight.doctor.prediction.reg_evaluation_recipe.compute_multiclass_metrics(modeling_params, valid_y, preds, probas=None, sample_weight=None, unprocessed=None)
deepinsight.doctor.prediction.reg_evaluation_recipe.compute_regression_metrics(modeling_params, valid_y, preds, sample_weight=None, unprocessed=None)
deepinsight.doctor.prediction.reg_evaluation_recipe.main(model_folder, input_dataset_smartname, output_dataset_smartname, metrics_dataset_smartname, recipe_desc, script, preparation_output_schema, cond_outputs=None)

deepinsight.doctor.prediction.reg_scoring_recipe module

Execute a prediction scoring recipe in PyRegular mode Must be called in a Flow environment

deepinsight.doctor.prediction.reg_scoring_recipe.main(model_folder, input_dataset_smartname, output_dataset_smartname, recipe_desc, script, preparation_output_schema, cond_outputs=None)

deepinsight.doctor.prediction.reg_train_recipe module

Execute a prediction training recipe in PyRegular mode Must be called in a Flow environment

deepinsight.doctor.prediction.reg_train_recipe.main(exec_folder, selection_state_folder, operation_mode)

The whole execution of the saved model train takes place in a single folder ?

deepinsight.doctor.prediction.regression_fit module

class deepinsight.doctor.prediction.regression_fit.DecisionTreeRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'DECISION_TREE_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.ExtraTreesRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'EXTRA_TREES'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.GBTRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'GBT_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.KNNRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'KNN'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.regression_fit.LARSRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'LARS'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.regression_fit.LassoRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'LASSO_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.regression_fit.LeastSquareRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'LEASTSQUARE_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.NeuralNetworkRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'NEURAL_NETWORK'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = False
class deepinsight.doctor.prediction.regression_fit.RFRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'RANDOM_FOREST_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.RidgeRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'RIDGE_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.SGDRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SGD_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.SVMRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SVM_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.ScikitRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'SCIKIT_MODEL'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
class deepinsight.doctor.prediction.regression_fit.XGBoostRegression

Bases: deepinsight.doctor.prediction.common.PredictionAlgorithm

algorithm = 'XGBOOST_REGRESSION'
model_from_params(ingrid, modeling_params, prediction_type)
output_params(ret, clf, fit_params)
supports_weight = True
deepinsight.doctor.prediction.regression_fit.register_regression_algorithm(algorithm)
deepinsight.doctor.prediction.regression_fit.regression_fit_ensemble(modeling_params, core_params, split_desc, train_X, train_y, sample_weight=None)
deepinsight.doctor.prediction.regression_fit.regression_fit_single(modeling_params, split_desc, transformed_train, m_folder=None, gridsearch_done_fn=None, with_sample_weight=False)

Returns (clf, actual_params, prepared_train_X, initial_intrinsic_perf_data) Extracts the best estimator for grid search ones

deepinsight.doctor.prediction.regression_scoring module

class deepinsight.doctor.prediction.regression_scoring.CVRegressionModelScorer(scorers)

Bases: object

score()
class deepinsight.doctor.prediction.regression_scoring.Denormalizer(rescalers)

Bases: object

Post-processing on the coefficients of a linear model. Scales back coefficients, intercepts and std thereof to maintain homogeneity with the original variable.

denormalize_coef(feature_name, coef_value)
denormalize_feature_value(feature_name, feature_value)
denormalize_intercept(intercept_value, feature_names, coef_values)
denormalize_intercept_stderr(intercept_stderr, feature_names, coef_stderr_values)
class deepinsight.doctor.prediction.regression_scoring.GradientBoostingSummaryBuilder(model, featureNames, rescalers, is_regression, max_nodes)

Bases: object

build()
class deepinsight.doctor.prediction.regression_scoring.PartialDependencyPlotBuilder(model, train_X, train_y, rescalers, offset=False)

Bases: object

build()
class deepinsight.doctor.prediction.regression_scoring.RandomForestSummaryBuilder(model, featureNames, rescalers, is_regression, max_nodes)

Bases: object

build()
class deepinsight.doctor.prediction.regression_scoring.RegressionModelIntrinsicScorer(modeling_params, clf, train_X, train_y, pipeline, out_folder, prepared_X, iipd)

Bases: deepinsight.doctor.prediction.scoring_base.PredictionModelIntrinsicScorer

score()
class deepinsight.doctor.prediction.regression_scoring.RegressionModelScorer(modeling_params, clf, preds, target, out_folder, valid, input_df_index, sample_weight)

Bases: deepinsight.doctor.prediction.scoring_base.PredictionModelScorer

compute_predicted_data(preds, valid_X_index)
get_regression_performance(valid_y, preds, sample_weight=None)
score()
class deepinsight.doctor.prediction.regression_scoring.TreeSummaryBuilder(model, feature_names, rescalers, is_regression)

Bases: object

build()
class deepinsight.doctor.prediction.regression_scoring.ZeroEstimator

Bases: sklearn.base.BaseEstimator

fit(X, y)
predict(X)
deepinsight.doctor.prediction.regression_scoring.compute_metrics(valid_y, preds, sample_weight=None)
deepinsight.doctor.prediction.regression_scoring.make_tree_data(extract, feature_names, rescalers, is_regression)
deepinsight.doctor.prediction.regression_scoring.pearson_correlation(valid_y, preds, sample_weight=None)
deepinsight.doctor.prediction.regression_scoring.regression_predict(clf, pipeline, modeling_params, data, ensemble_has_target=False)

returns the predicted dataframe. Used by the scoring recipe only at the moment

deepinsight.doctor.prediction.regression_scoring.regression_predict_ensemble(clf, data, has_target=False)
deepinsight.doctor.prediction.regression_scoring.regression_predict_single(clf, pipeline, modeling_params, data)
deepinsight.doctor.prediction.regression_scoring.regression_scorer_with_valid(modeling_params, clf, valid, fold_mfolder, input_df_index, with_sample_weight=False)
deepinsight.doctor.prediction.regression_scoring.set_n_features_v0_18_v0_19(m, n)

deepinsight.doctor.prediction.scoring_base module

class deepinsight.doctor.prediction.scoring_base.PredictionModelIntrinsicScorer(modeling_params, clf, train_X, train_y, out_folder, prepared_X)

Bases: object

get_rf_raw_importance(clf, ret)
class deepinsight.doctor.prediction.scoring_base.PredictionModelScorer(modeling_params, clf, valid)

Bases: object

add_metric(measure, value, description='')
get_variables_importance()
deepinsight.doctor.prediction.scoring_base.compute_lm_significance(clf, coefs, intercept, prepared_X, train_y, regression=True)

Returns (t_test, p_val)

deepinsight.doctor.prediction.scoring_base.trim_curve(curve, distance_threshold=0.05)

Given a list of P_k=(x,y) curve points, remove points until there is no segemnt P_k , P_k+1 that are smaller than distance_threshold.

Module contents