In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from xgboost.sklearn import XGBRegressor
from sklearn.metrics import r2_score
from sklearn.utils import check_array
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline, make_union
from sklearn.model_selection import GridSearchCV
import statsmodels.api as sm
from sklearn.preprocessing import scale
from sklearn.preprocessing import LabelEncoder
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import PCA, FastICA
from sklearn.linear_model import ElasticNetCV, LassoLarsCV
from sklearn.ensemble import GradientBoostingRegressor
import xgboost as xgb
import eli5


from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin

color = sns.color_palette()

%matplotlib inline

pd.options.mode.chained_assignment = None  # default='warn'
pd.options.display.max_columns = 999
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
In [2]:
# https://www.kaggle.com/hakeem/stacked-then-averaged-models-0-5697/code
class StackingEstimator(BaseEstimator, TransformerMixin):
    
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, y=None, **fit_params):
        self.estimator.fit(X, y, **fit_params)
        return self
    def transform(self, X):
        X = check_array(X)
        X_transformed = np.copy(X)
        # add class probabilities as a synthetic feature
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            X_transformed = np.hstack((self.estimator.predict_proba(X), X))

        # add class prodiction as a synthetic feature
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))

        return X_transformed
In [3]:
data_train = pd.read_csv('../data/raw/train.csv')
data_test = pd.read_csv('../data/raw/test.csv')
In [4]:
# process columns, apply LabelEncoder to categorical features
for c in data_train.columns:
    if data_train[c].dtype == 'object':
        lbl = LabelEncoder() 
        lbl.fit(list(data_train[c].values) + list(data_test[c].values)) 
        data_train[c] = lbl.transform(list(data_train[c].values))
        data_test[c] = lbl.transform(list(data_test[c].values))
In [5]:
n_comp = 12

# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(data_train.drop(["y"], axis=1))
tsvd_results_test = tsvd.transform(data_test)

# PCA
pca = PCA(n_components=n_comp, random_state=420)
pca2_results_train = pca.fit_transform(data_train.drop(["y"], axis=1))
pca2_results_test = pca.transform(data_test)

# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(data_train.drop(["y"], axis=1))
ica2_results_test = ica.transform(data_test)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(data_train.drop(["y"], axis=1))
grp_results_test = grp.transform(data_test)

# SRP
srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
srp_results_train = srp.fit_transform(data_train.drop(["y"], axis=1))
srp_results_test = srp.transform(data_test)

usable_columns = list(set(data_train.columns) - set(['y']))

# Append decomposition components to datasets
for i in range(1, n_comp+1):
    data_train['pca_' + str(i)] = pca2_results_train[:,i-1]
    data_test['pca_' + str(i)] = pca2_results_test[:, i-1]
    
    data_train['ica_' + str(i)] = ica2_results_train[:,i-1]
    data_test['ica_' + str(i)] = ica2_results_test[:, i-1]

    data_train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
    data_test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]
    
    data_train['grp_' + str(i)] = grp_results_train[:,i-1]
    data_test['grp_' + str(i)] = grp_results_test[:, i-1]
    
    data_train['srp_' + str(i)] = srp_results_train[:,i-1]
    data_test['srp_' + str(i)] = srp_results_test[:, i-1]
In [6]:
y_train = data_train['y'].values
y_mean = np.mean(y_train)
id_test = data_test['ID'].values
#finaltrainset and finaltestset are data to be used only the stacked model (does not contain PCA, SVD... arrays) 
finaltrainset = data_train[usable_columns].values
finaltestset = data_test[usable_columns].values

Initial Training of xgboost¶

In [7]:
'''Train the xgb model then predict the test data'''

xgb_params = {
    'n_trees': 520, 
    'eta': 0.0045,
    'max_depth': 4,
    'subsample': 0.93,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1
}
# NOTE: Make sure that the class is labeled 'class' in the data file

dtrain = xgb.DMatrix(data_train.drop('y', axis=1), y_train)
dtest = xgb.DMatrix(data_test)

num_boost_rounds = 1250
# train model
model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds)
y_pred = model.predict(dtest)
In [8]:
model.predict(dtest)
Out[8]:
array([  80.73707581,   98.72522736,   79.47880554, ...,   96.39012909,
        109.93545532,   95.60625458], dtype=float32)
In [9]:
eli5.show_weights(model)
Out[9]:
Weight Feature
0.2315 X314
0.0710 X315
0.0607 X29
0.0432 X118
0.0362 X96
0.0315 X189
0.0156 X19
0.0140 pca_9
0.0101 X218
0.0095 X70
0.0094 X261
0.0088 X127
0.0086 X0
0.0081 pca_4
0.0072 X100
0.0072 X294
0.0069 tsvd_9
0.0062 X201
0.0060 tsvd_2
0.0058 X85
… 169 more …

Pseudo-labelling for xgboost training¶

In [10]:
train = pd.concat([data_train, data_test.join(pd.Series(y_pred, name='y')).sample(frac=0.25, replace=False)])
col_order = train.drop('y', axis=1).columns
In [11]:
y_train = train['y'].values
y_mean = np.mean(y_train)
id_test = data_test['ID'].values
#finaltrainset and finaltestset are data to be used only the stacked model (does not contain PCA, SVD... arrays) 
finaltrainset = train[usable_columns].values
finaltestset = data_test[usable_columns].values

dtrain = xgb.DMatrix(train.drop('y', axis=1), y_train)
dtest = xgb.DMatrix(data_test[col_order])

num_boost_rounds = 1250
# train model
model_pseudo = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds)
y_pred_pseudo = model_pseudo.predict(dtest)

Stacked pipeline¶

In [12]:
'''Train the stacked models then predict the test data'''

stacked_pipeline = make_pipeline(
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    StackingEstimator(estimator=GradientBoostingRegressor(learning_rate=0.001, loss="huber", max_depth=3, max_features=0.55, min_samples_leaf=18, min_samples_split=14, subsample=0.7)),
    LassoLarsCV()
)

stacked_pipeline.fit(finaltrainset, y_train)
results = stacked_pipeline.predict(finaltestset)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 3 iterations, i.e. alpha=4.804e-02, with an active set of 3 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:377: RuntimeWarning: overflow encountered in divide
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 6 iterations, i.e. alpha=2.048e-02, with an active set of 6 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:381: RuntimeWarning: overflow encountered in divide
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 8 iterations, i.e. alpha=1.545e-02, with an active set of 8 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 8 iterations, i.e. alpha=1.545e-02, with an active set of 8 regressors, and the smallest cholesky pivot element being 5.268e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 14 iterations, i.e. alpha=1.291e-02, with an active set of 12 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 15 iterations, i.e. alpha=1.186e-02, with an active set of 13 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 16 iterations, i.e. alpha=1.099e-02, with an active set of 14 regressors, and the smallest cholesky pivot element being 1.825e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 20 iterations, i.e. alpha=1.075e-02, with an active set of 16 regressors, and the smallest cholesky pivot element being 7.885e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 20 iterations, i.e. alpha=1.062e-02, with an active set of 16 regressors, and the smallest cholesky pivot element being 7.885e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 22 iterations, i.e. alpha=1.018e-02, with an active set of 16 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 23 iterations, i.e. alpha=1.010e-02, with an active set of 17 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 30 iterations, i.e. alpha=9.049e-03, with an active set of 20 regressors, and the smallest cholesky pivot element being 2.581e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 41 iterations, i.e. alpha=8.287e-03, with an active set of 31 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 45 iterations, i.e. alpha=7.951e-03, with an active set of 35 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 48 iterations, i.e. alpha=7.863e-03, with an active set of 38 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 54 iterations, alpha=7.841e-03, previous alpha=7.645e-03, with an active set of 43 regressors.
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 5 iterations, i.e. alpha=1.905e-02, with an active set of 5 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 8 iterations, i.e. alpha=1.727e-02, with an active set of 8 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 13 iterations, i.e. alpha=1.005e-02, with an active set of 13 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 13 iterations, i.e. alpha=1.005e-02, with an active set of 13 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 14 iterations, i.e. alpha=8.615e-03, with an active set of 14 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 16 iterations, i.e. alpha=8.327e-03, with an active set of 16 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 16 iterations, i.e. alpha=8.327e-03, with an active set of 16 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 19 iterations, i.e. alpha=7.129e-03, with an active set of 19 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 25 iterations, i.e. alpha=5.850e-03, with an active set of 25 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 28 iterations, i.e. alpha=4.989e-03, with an active set of 28 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 28 iterations, i.e. alpha=4.989e-03, with an active set of 28 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 32 iterations, i.e. alpha=4.275e-03, with an active set of 32 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 33 iterations, i.e. alpha=4.138e-03, with an active set of 33 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 33 iterations, i.e. alpha=4.138e-03, with an active set of 33 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 39 iterations, i.e. alpha=3.600e-03, with an active set of 39 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 39 iterations, i.e. alpha=3.600e-03, with an active set of 39 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 40 iterations, i.e. alpha=3.589e-03, with an active set of 40 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 41 iterations, i.e. alpha=3.520e-03, with an active set of 41 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 46 iterations, alpha=3.214e-03, previous alpha=3.080e-03, with an active set of 45 regressors.
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 5 iterations, i.e. alpha=1.881e-02, with an active set of 5 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 14 iterations, i.e. alpha=9.681e-03, with an active set of 14 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 18 iterations, i.e. alpha=8.337e-03, with an active set of 18 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 18 iterations, i.e. alpha=8.337e-03, with an active set of 18 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 20 iterations, i.e. alpha=6.969e-03, with an active set of 20 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 24 iterations, i.e. alpha=4.863e-03, with an active set of 24 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 25 iterations, i.e. alpha=4.826e-03, with an active set of 25 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 27 iterations, i.e. alpha=4.569e-03, with an active set of 27 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 31 iterations, i.e. alpha=4.146e-03, with an active set of 31 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 31 iterations, i.e. alpha=4.146e-03, with an active set of 31 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 34 iterations, i.e. alpha=4.033e-03, with an active set of 34 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 38 iterations, i.e. alpha=3.718e-03, with an active set of 38 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 38 iterations, i.e. alpha=3.718e-03, with an active set of 38 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 38 iterations, i.e. alpha=3.718e-03, with an active set of 38 regressors, and the smallest cholesky pivot element being 2.107e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 44 iterations, i.e. alpha=3.477e-03, with an active set of 42 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 47 iterations, alpha=3.378e-03, previous alpha=3.351e-03, with an active set of 44 regressors.
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 5 iterations, i.e. alpha=1.663e-02, with an active set of 5 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 57 iterations, i.e. alpha=1.768e-03, with an active set of 55 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 65 iterations, i.e. alpha=1.652e-03, with an active set of 63 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 66 iterations, i.e. alpha=1.642e-03, with an active set of 64 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 66 iterations, i.e. alpha=1.642e-03, with an active set of 64 regressors, and the smallest cholesky pivot element being 2.356e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 70 iterations, i.e. alpha=1.571e-03, with an active set of 68 regressors, and the smallest cholesky pivot element being 3.495e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 72 iterations, i.e. alpha=1.561e-03, with an active set of 70 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 72 iterations, i.e. alpha=1.561e-03, with an active set of 70 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 84 iterations, i.e. alpha=1.312e-03, with an active set of 82 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 90 iterations, i.e. alpha=1.194e-03, with an active set of 88 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 90 iterations, i.e. alpha=1.194e-03, with an active set of 88 regressors, and the smallest cholesky pivot element being 2.356e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 90 iterations, i.e. alpha=1.194e-03, with an active set of 88 regressors, and the smallest cholesky pivot element being 3.650e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 101 iterations, alpha=1.030e-03, previous alpha=1.029e-03, with an active set of 98 regressors.
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 9 iterations, i.e. alpha=4.735e-03, with an active set of 9 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 36 iterations, i.e. alpha=2.718e-03, with an active set of 36 regressors, and the smallest cholesky pivot element being 2.581e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 41 iterations, i.e. alpha=2.479e-03, with an active set of 41 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 41 iterations, i.e. alpha=2.479e-03, with an active set of 41 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 42 iterations, i.e. alpha=2.411e-03, with an active set of 42 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 43 iterations, i.e. alpha=2.360e-03, with an active set of 43 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 44 iterations, i.e. alpha=2.284e-03, with an active set of 44 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 45 iterations, i.e. alpha=2.225e-03, with an active set of 45 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 64 iterations, i.e. alpha=1.543e-03, with an active set of 64 regressors, and the smallest cholesky pivot element being 2.107e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 80 iterations, i.e. alpha=1.342e-03, with an active set of 78 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 80 iterations, i.e. alpha=1.342e-03, with an active set of 78 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 81 iterations, i.e. alpha=1.333e-03, with an active set of 79 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 84 iterations, i.e. alpha=1.313e-03, with an active set of 82 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 94 iterations, i.e. alpha=1.210e-03, with an active set of 92 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 94 iterations, i.e. alpha=1.210e-03, with an active set of 92 regressors, and the smallest cholesky pivot element being 1.054e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 94 iterations, i.e. alpha=1.210e-03, with an active set of 92 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 98 iterations, i.e. alpha=1.145e-03, with an active set of 96 regressors, and the smallest cholesky pivot element being 1.825e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 99 iterations, i.e. alpha=1.138e-03, with an active set of 97 regressors, and the smallest cholesky pivot element being 1.825e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 105 iterations, alpha=1.068e-03, previous alpha=1.062e-03, with an active set of 102 regressors.
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 9 iterations, i.e. alpha=4.958e-03, with an active set of 9 regressors, and the smallest cholesky pivot element being 2.107e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 9 iterations, i.e. alpha=4.958e-03, with an active set of 9 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 44 iterations, i.e. alpha=2.479e-03, with an active set of 44 regressors, and the smallest cholesky pivot element being 2.107e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 44 iterations, i.e. alpha=2.479e-03, with an active set of 44 regressors, and the smallest cholesky pivot element being 2.220e-16
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:309: ConvergenceWarning: Regressors in active set degenerate. Dropping a regressor, after 46 iterations, i.e. alpha=2.362e-03, with an active set of 46 regressors, and the smallest cholesky pivot element being 1.490e-08
  ConvergenceWarning)
/Users/mkoutero/anaconda2/lib/python2.7/site-packages/sklearn/linear_model/least_angle.py:334: ConvergenceWarning: Early stopping the lars path, as the residues are small and the current value of alpha is no longer well controlled. 67 iterations, alpha=1.880e-03, previous alpha=1.859e-03, with an active set of 66 regressors.
  ConvergenceWarning)

Explain model¶

In [13]:
eli5.show_weights(model_pseudo)
Out[13]:
Weight Feature
0.2820 X314
0.0784 X315
0.0668 X29
0.0489 X118
0.0460 X136
0.0270 X189
0.0185 ica_12
0.0131 pca_9
0.0114 pca_4
0.0094 X261
0.0087 X127
0.0070 X363
0.0069 X85
0.0056 X70
0.0054 grp_9
0.0052 pca_7
0.0051 X47
0.0051 grp_3
0.0049 tsvd_9
0.0049 X218
… 163 more …

Final prediction¶

In [14]:
'''R2 Score on the entire Train data when averaging'''

print('R2 score on train data:')
print(r2_score(y_train,stacked_pipeline.predict(finaltrainset)*0.25 + model_pseudo.predict(dtrain)*0.75))

'''Average the preditionon test data  of both models then save it on a csv file'''

sub = pd.DataFrame()
sub['ID'] = id_test
sub['y'] = y_pred*0.75 + results*0.25
#sub.to_csv('pseudo-stacked-models.csv', index=False)
R2 score on train data:
0.693779536193

If you add more of xgboost model, you end up overfitting..