In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
train_X = pd.read_excel('ML_training_features.xlsx')
train_y = pd.read_excel('ML_training_target.xlsx')

In [3]:
validation_X = pd.read_excel('ML_validation_features.xlsx')
validation_y = pd.read_excel('ML_validation_target.xlsx')

In [4]:
validation_y = validation_y['ReleaseArea']
train_y = train_y['ReleaseArea']

In [5]:
# feature combinations
p1 = ['Slope']

p2 = ['Slope','Elevation']

p3 = ['Slope','North','East','North East','North West','South','South East','South West','West']

p4 = ['Slope','Elevation','Plan_curv','Profile_curv','TRI','Distance_to_roads']

p5 = ['Slope','Elevation','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']

p6 = ['Slope','Elevation','Plan_curv','Profile_curv','TRI']

p7 = ['Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']

p8 = ['Slope','Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads',
      'Granite','Granodiorite','Tonalite','Trondhjemite','Syenite','Monzonite','Monzodiorite','Quartz diorite','Diorite','Gabbro','Norite','Peridotite','Pyroksenite','Charnockite','Mangerite','Anorthosite','Mafic dyke (Diabase, Dolerite)','Pegmatite/aplite','Felsic volcanic rock','Rhyolite','Dacite','Intermediate volcanic rock','Andesite','Mafic volcanic rock','Basalt',
                  'Pyroclastic rock','Volcanic breccia','Siltstone','Sandstone','Greywacke','Arkose','Konglomerate','Sedimentary breccia','Limestone','Tuffite','Shale','Phyllite','Mica schist','Garnet mica schist','Calcareous phyllite','Calcareous mica schist','Amphibole schist','Graphitic schist','Calcite marble',
                 'Metasandstone','Metagreywacke','Meta-arkose','Quartzite','Quartz schist','Mica gneiss','Calc-silicate rock','Amphibole gneiss','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Quartz dioritic gneiss','Monzonitic gneiss','Dioritic gneis','Orthopyroxene gneiss','Migmatite','Augengneiss',
                    'Banded gneiss','Greenschist','Greenstone','Amphibolite','Metagabbro','Eclogite','Serpentinite','Mylonite/Phyllonite','Cataclasite']

feature_combinations = [p1,p2,p3,p4,p5,p6,p7,p8]

In [6]:
best_params = []
cv_results = []
best_score = []
parameter_setting = []

for i in feature_combinations:
    
    X_train = train_X[i]
    
    
    AUTO_SCALING = True
    if AUTO_SCALING:
        scaler = StandardScaler()
        scaler.fit(X_train)
        X_training = scaler.transform(X_train)
        
    clf = GradientBoostingClassifier(random_state=0)

    
    param_grid = {"n_estimators": np.arange(70, 120, 1),
              "learning_rate": np.arange(0.10, 0.30, 0.02),
              'max_depth':np.arange(1,15,1)}
    
    clf = RandomizedSearchCV(estimator=clf, param_distributions=param_grid,verbose=3, cv = 5,scoring = 'f1')
    clf.fit(X_training, train_y)
    
    best_params.append(clf.best_params_)
    cv_results.append(clf.cv_results_)
    best_score.append(clf.best_score_)
    parameter_setting.append(clf.cv_results_['params'][clf.best_index_])

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=97; total time=   0.5s
[CV 2/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=97; total time=   0.5s
[CV 3/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=97; total time=   0.6s
[CV 4/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=97; total time=   0.5s
[CV 5/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=97; total time=   0.5s
[CV 1/5] END learning_rate=0.18000000000000002, max_depth=7, n_estimators=116; total time=   3.9s
[CV 2/5] END learning_rate=0.18000000000000002, max_depth=7, n_estimators=116; total time=   3.9s
[CV 3/5] END learning_rate=0.18000000000000002, max_depth=7, n_estimators=116; total time=   4.0s
[CV 4/5] END learning_rate=0.18000000000000002, max_depth=7, n_estimators=116; total time=   4.1s
[CV 5/5] END learning_rate=0.18000000000000002, max_depth=7, n

[CV 5/5] END learning_rate=0.14, max_depth=14, n_estimators=106; total time=  12.9s
[CV 1/5] END learning_rate=0.1, max_depth=1, n_estimators=71; total time=   0.6s
[CV 2/5] END learning_rate=0.1, max_depth=1, n_estimators=71; total time=   0.7s
[CV 3/5] END learning_rate=0.1, max_depth=1, n_estimators=71; total time=   0.7s
[CV 4/5] END learning_rate=0.1, max_depth=1, n_estimators=71; total time=   0.7s
[CV 5/5] END learning_rate=0.1, max_depth=1, n_estimators=71; total time=   0.7s
[CV 1/5] END learning_rate=0.22000000000000003, max_depth=9, n_estimators=113; total time=   7.9s
[CV 2/5] END learning_rate=0.22000000000000003, max_depth=9, n_estimators=113; total time=   8.1s
[CV 3/5] END learning_rate=0.22000000000000003, max_depth=9, n_estimators=113; total time=   9.3s
[CV 4/5] END learning_rate=0.22000000000000003, max_depth=9, n_estimators=113; total time=   9.0s
[CV 5/5] END learning_rate=0.22000000000000003, max_depth=9, n_estimators=113; total time=  10.5s
Fitting 5 folds for e

[CV 1/5] END learning_rate=0.26, max_depth=5, n_estimators=111; total time=   9.6s
[CV 2/5] END learning_rate=0.26, max_depth=5, n_estimators=111; total time=   9.6s
[CV 3/5] END learning_rate=0.26, max_depth=5, n_estimators=111; total time=   9.6s
[CV 4/5] END learning_rate=0.26, max_depth=5, n_estimators=111; total time=   9.7s
[CV 5/5] END learning_rate=0.26, max_depth=5, n_estimators=111; total time=   9.9s
[CV 1/5] END learning_rate=0.18000000000000002, max_depth=11, n_estimators=93; total time=  19.0s
[CV 2/5] END learning_rate=0.18000000000000002, max_depth=11, n_estimators=93; total time=  18.8s
[CV 3/5] END learning_rate=0.18000000000000002, max_depth=11, n_estimators=93; total time=  18.9s
[CV 4/5] END learning_rate=0.18000000000000002, max_depth=11, n_estimators=93; total time=  20.5s
[CV 5/5] END learning_rate=0.18000000000000002, max_depth=11, n_estimators=93; total time=  18.9s
[CV 1/5] END learning_rate=0.28, max_depth=9, n_estimators=81; total time=  13.4s
[CV 2/5] END 

[CV 5/5] END learning_rate=0.22000000000000003, max_depth=1, n_estimators=70; total time=   2.0s
[CV 1/5] END learning_rate=0.1, max_depth=9, n_estimators=76; total time=  14.3s
[CV 2/5] END learning_rate=0.1, max_depth=9, n_estimators=76; total time=  11.3s
[CV 3/5] END learning_rate=0.1, max_depth=9, n_estimators=76; total time=  11.9s
[CV 4/5] END learning_rate=0.1, max_depth=9, n_estimators=76; total time=  12.1s
[CV 5/5] END learning_rate=0.1, max_depth=9, n_estimators=76; total time=  15.3s
[CV 1/5] END learning_rate=0.16000000000000003, max_depth=10, n_estimators=95; total time=  17.2s
[CV 2/5] END learning_rate=0.16000000000000003, max_depth=10, n_estimators=95; total time=  18.1s
[CV 3/5] END learning_rate=0.16000000000000003, max_depth=10, n_estimators=95; total time=  17.9s
[CV 4/5] END learning_rate=0.16000000000000003, max_depth=10, n_estimators=95; total time=  16.1s
[CV 5/5] END learning_rate=0.16000000000000003, max_depth=10, n_estimators=95; total time=  16.1s
[CV 1/5]

[CV 5/5] END learning_rate=0.16000000000000003, max_depth=6, n_estimators=113; total time=  25.0s
[CV 1/5] END learning_rate=0.1, max_depth=11, n_estimators=88; total time=  31.6s
[CV 2/5] END learning_rate=0.1, max_depth=11, n_estimators=88; total time=  43.3s
[CV 3/5] END learning_rate=0.1, max_depth=11, n_estimators=88; total time=  34.2s
[CV 4/5] END learning_rate=0.1, max_depth=11, n_estimators=88; total time=  34.0s
[CV 5/5] END learning_rate=0.1, max_depth=11, n_estimators=88; total time=  33.5s
[CV 1/5] END learning_rate=0.24000000000000002, max_depth=12, n_estimators=97; total time=  44.8s
[CV 2/5] END learning_rate=0.24000000000000002, max_depth=12, n_estimators=97; total time=  44.7s
[CV 3/5] END learning_rate=0.24000000000000002, max_depth=12, n_estimators=97; total time=  39.9s
[CV 4/5] END learning_rate=0.24000000000000002, max_depth=12, n_estimators=97; total time=  40.5s
[CV 5/5] END learning_rate=0.24000000000000002, max_depth=12, n_estimators=97; total time=  40.9s
[C

In [7]:
best_params_df = pd.DataFrame(best_params)
best_params_df.to_excel('GBRT_best_params_RandomSearch4.xlsx')

In [8]:
cv_results_df = pd.DataFrame(cv_results)
cv_results_df.to_excel('GBRT_cv_results_RandomSearch4.xlsx')

In [9]:
best_score_df = pd.DataFrame(best_score)
best_score_df.to_excel('GBRT_best_cv_scores_RandomSearch4.xlsx')