### Predictions with the trained RF-models

Linn Alexandra Emhjellen, 2021.


In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import joblib


In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn import datasets, linear_model
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.metrics import recall_score,roc_curve,auc
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics

In [3]:
df_database = pd.read_csv('ML_Lærdal_Aurland_close_10_m_database.csv')
df_database = df_database.dropna()


In [5]:
# load models

RF_p1_model = joblib.load("../RF/RF_p1_Emhjellen2.joblib")
RF_p2_model = joblib.load("../RF/RF_p2_Emhjellen2.joblib")
RF_p3_model = joblib.load("../RF/RF_p3_Emhjellen2.joblib")
RF_p4_model = joblib.load("../RF/RF_p4_Emhjellen2.joblib")
RF_p5_model = joblib.load("../RF/RF_p5_Emhjellen2.joblib")
RF_p6_model = joblib.load("../RF/RF_p6_Emhjellen2.joblib")
RF_p7_model = joblib.load("../RF/RF_p7_Emhjellen2.joblib")
RF_p8_model = joblib.load("../RF/RF_p8_Emhjellen2.joblib")



In [6]:
missing_bedrocks = ['Graphitic schist', 'Cataclasite', 'Dioritic gneis', 'Trondhjemite', 'Norite', 'Sedimentary breccia', 'Andesite', 'Mafic dyke (Diabase, Dolerite)', 'Gabbro', 'Quartz diorite', 'Greenstone', 'Dacite', 'Calc-silicate rock', 'Mangerite', 'Metasandstone', 'Quartz schist', 'Phyllite', 'Tonalite', 'Calcareous mica schist', 'Granite', 'Eclogite', 'Pegmatite/aplite', 'Pyroclastic rock', 'Mica gneiss', 'Mafic volcanic rock', 'Konglomerate', 'Intermediate volcanic rock', 'Migmatite', 'Calcareous phyllite', 'Quartzite', 'Diorite', 'Arkose', 'Volcanic breccia', 'Sandstone', 'Garnet mica schist', 'Siltstone', 'Calcite marble', 'Shale', 'Mylonite/Phyllonite', 'Granodioritic gneiss', 'Amphibole gneiss', 'Orthopyroxene gneiss', 'Syenite', 'Rhyolite', 'Banded gneiss', 'Quartz dioritic gneiss', 'Limestone', 'Metagreywacke', 'Basalt', 'Mica schist', 'Metagabbro', 'Granodiorite', 'Felsic volcanic rock', 'Tuffite', 'Meta-arkose', 'Greywacke', 'Peridotite', 'Pyroksenite', 'Augengneiss', 'Serpentinite', 'Greenschist', 'Amphibole schist', 'Amphibolite', 'Monzonitic gneiss']

In [8]:
for i in missing_bedrocks:
 print(i)
 df_database[i] = 0

Graphitic schist
Cataclasite
Dioritic gneis
Trondhjemite
Norite
Sedimentary breccia
Andesite
Mafic dyke (Diabase, Dolerite)
Gabbro
Quartz diorite
Greenstone
Dacite
Calc-silicate rock
Mangerite
Metasandstone
Quartz schist
Phyllite
Tonalite
Calcareous mica schist
Granite
Eclogite
Pegmatite/aplite
Pyroclastic rock
Mica gneiss
Mafic volcanic rock
Konglomerate
Intermediate volcanic rock
Migmatite
Calcareous phyllite
Quartzite
Diorite
Arkose
Volcanic breccia
Sandstone
Garnet mica schist
Siltstone
Calcite marble
Shale
Mylonite/Phyllonite
Granodioritic gneiss
Amphibole gneiss
Orthopyroxene gneiss
Syenite
Rhyolite
Banded gneiss
Quartz dioritic gneiss
Limestone
Metagreywacke
Basalt
Mica schist
Metagabbro
Granodiorite
Felsic volcanic rock
Tuffite
Meta-arkose
Greywacke
Peridotite
Pyroksenite
Augengneiss
Serpentinite
Greenschist
Amphibole schist
Amphibolite
Monzonitic gneiss


df_database['Arkose'] = 0
df_database['Greenstone'] = 0 
df_database['Metagreywacke'] = 0
df_database['Tonalite'] = 0
df_database['Peridotite'] = 0
df_database['Meta-arkose'] = 0
df_database['Dioritic gneis'] = 0
df_database['Pyroclastic rock'] = 0
df_database['Quartz diorite'] = 0
df_database['Sedimentary breccia'] = 0
df_database['Andesite'] = 0
df_database['Pyroksenite'] = 0
df_database['Charnockite'] = 0
df_database['Shale'] = 0
df_database['Mica schist'] = 0
df_database['Intermediate volcanic rock'] = 0
df_database['Trondhjemite'] = 0
df_database['Amphibole schist'] = 0
df_database['Serpentinite'] = 0
df_database['Mangerite'] = 0
df_database['Felsic volcanic rock'] = 0
df_database['Greywacke'] = 0
df_database['Pegmatite/aplite'] = 0
df_database['Tuffite'] = 0
df_database['Mafic dyke (Diabase, Dolerite)'] = 0
df_database['Mafic volcanic rock'] = 0
df_database['Calcareous mica schist'] = 0
df_database['Diorite'] = 0
df_database['Garnet mica schist'] = 0
df_database['Limestone'] = 0
df_database['Basalt'] = 0
df_database['Graphitic schist'] = 0
df_database['Norite'] = 0


In [10]:
# feature combinations
p1 = df_database[['Slope']]

p2 = df_database[['Slope','Elevation']]

p3 = df_database[['Slope','North','East','North East','North West','South','South East','South West','West']]

p4 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI','Distance_to_roads']]

p5 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']]

p6 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI']]

p7 = df_database[['Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']]

p8 = df_database[['Slope','Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads',
 'Granite','Granodiorite','Tonalite','Trondhjemite','Syenite','Monzonite','Monzodiorite','Quartz diorite','Diorite','Gabbro','Norite','Peridotite','Pyroksenite','Charnockite','Mangerite','Anorthosite','Mafic dyke (Diabase, Dolerite)','Pegmatite/aplite','Felsic volcanic rock','Rhyolite','Dacite','Intermediate volcanic rock','Andesite','Mafic volcanic rock','Basalt',
 'Pyroclastic rock','Volcanic breccia','Siltstone','Sandstone','Greywacke','Arkose','Konglomerate','Sedimentary breccia','Limestone','Tuffite','Shale','Phyllite','Mica schist','Garnet mica schist','Calcareous phyllite','Calcareous mica schist','Amphibole schist','Graphitic schist','Calcite marble',
 'Metasandstone','Metagreywacke','Meta-arkose','Quartzite','Quartz schist','Mica gneiss','Calc-silicate rock','Amphibole gneiss','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Quartz dioritic gneiss','Monzonitic gneiss','Dioritic gneis','Orthopyroxene gneiss','Migmatite','Augengneiss',
 'Banded gneiss','Greenschist','Greenstone','Amphibolite','Metagabbro','Eclogite','Serpentinite','Mylonite/Phyllonite','Cataclasite']]

feature_combinations = [p1,p2,p3,p4,p5,p6,p7,p8]

In [11]:
x_p1 = p1[~np.isnan(p1).any(axis=1)]
x_p2 = p2[~np.isnan(p2).any(axis=1)]
x_p3 = p3[~np.isnan(p3).any(axis=1)]
x_p4 = p4[~np.isnan(p4).any(axis=1)]
x_p5 = p5[~np.isnan(p5).any(axis=1)]
x_p6 = p6[~np.isnan(p6).any(axis=1)]
x_p7 = p7[~np.isnan(p7).any(axis=1)]
x_p8 = p8[~np.isnan(p8).any(axis=1)]

x_p = [x_p1,x_p2,x_p3,x_p4,x_p5,x_p6,x_p7,x_p8]

In [12]:
rf_models = [RF_p1_model,RF_p2_model,RF_p3_model,RF_p4_model,RF_p5_model,RF_p6_model,RF_p7_model,RF_p8_model]

predictions = []
pred_probabilities = []

for i in range(0,len(x_p)):
 
 AUTO_SCALING = True
 if AUTO_SCALING:
 scaler = StandardScaler()
 scaler.fit(x_p[i])
 x_p_i = scaler.transform(x_p[i])
 
 p_i_predictions = rf_models[i].predict(x_p_i)
 predictions.append(p_i_predictions)
 
 prob_p_i = rf_models[i].predict_proba(x_p_i)
 pred_probabilities.append(prob_p_i)
 

In [13]:
df_coor = df_database[['POINT_X', 'POINT_Y']]

In [14]:
pred_probabilities[0]

array([[9.99963262e-01, 3.67384980e-05],
 [9.96216110e-01, 3.78389014e-03],
 [9.94593860e-01, 5.40614012e-03],
 ...,
 [9.93907681e-01, 6.09231878e-03],
 [1.00000000e+00, 0.00000000e+00],
 [1.00000000e+00, 0.00000000e+00]])

In [15]:
df_coor['RF_Vestland_pred_p1'] = predictions[0]
df_coor['RF_Vestland_prob_p1'] = pred_probabilities[0][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p1'] = predictions[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p1'] = pred_probabilities[0][:,1]


In [16]:
df_coor['RF_Vestland_pred_p2'] = predictions[1]
df_coor['RF_Vestland_prob_p2'] = pred_probabilities[1][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p2'] = predictions[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p2'] = pred_probabilities[1][:,1]


In [17]:
df_coor['RF_Vestland_pred_p3'] = predictions[2]
df_coor['RF_Vestland_prob_p3'] = pred_probabilities[2][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p3'] = predictions[2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p3'] = pred_probabilities[2][:,1]


In [18]:
df_coor['RF_Vestland_pred_p4'] = predictions[3]
df_coor['RF_Vestland_prob_p4'] = pred_probabilities[3][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p4'] = predictions[3]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p4'] = pred_probabilities[3][:,1]


In [19]:
df_coor['RF_Vestland_pred_p5'] = predictions[4]
df_coor['RF_Vestland_prob_p5'] = pred_probabilities[4][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p5'] = predictions[4]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p5'] = pred_probabilities[4][:,1]


In [20]:
df_coor['RF_Vestland_pred_p6'] = predictions[5]
df_coor['RF_Vestland_prob_p6'] = pred_probabilities[5][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p6'] = predictions[5]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p6'] = pred_probabilities[5][:,1]


In [21]:
df_coor['RF_Vestland_pred_p7'] = predictions[6]
df_coor['RF_Vestland_prob_p7'] = pred_probabilities[6][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p7'] = predictions[6]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p7'] = pred_probabilities[6][:,1]


In [22]:
df_coor['RF_Vestland_pred_p8'] = predictions[7]
df_coor['RF_Vestland_prob_p8'] = pred_probabilities[7][:,1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_pred_p8'] = predictions[7]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
 df_coor['RF_Vestland_prob_p8'] = pred_probabilities[7][:,1]


In [23]:
df_coor.to_csv('RF_results_lærdal_aurland_close_10.csv')