{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Predictions with the trained RF-models\n", "\n", "Linn Alexandra Emhjellen, 2021.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import os\n", "import matplotlib.pyplot as plt\n", "import joblib\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "from sklearn import datasets, linear_model\n", "from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor\n", "from sklearn.metrics import recall_score,roc_curve,auc\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.model_selection import cross_val_predict\n", "from sklearn import metrics" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df_database = pd.read_csv('ML_Lærdal_Aurland_close_10_m_database.csv')\n", "df_database = df_database.dropna()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\LEm\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:310: UserWarning: Trying to unpickle estimator DecisionTreeClassifier from version 0.23.1 when using version 0.24.1. This might lead to breaking code or invalid results. Use at your own risk.\n", " warnings.warn(\n", "C:\\Users\\LEm\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:310: UserWarning: Trying to unpickle estimator RandomForestClassifier from version 0.23.1 when using version 0.24.1. This might lead to breaking code or invalid results. Use at your own risk.\n", " warnings.warn(\n" ] } ], "source": [ "# load models\n", "\n", "RF_p1_model = joblib.load(\"../RF/RF_p1_Emhjellen2.joblib\")\n", "RF_p2_model = joblib.load(\"../RF/RF_p2_Emhjellen2.joblib\")\n", "RF_p3_model = joblib.load(\"../RF/RF_p3_Emhjellen2.joblib\")\n", "RF_p4_model = joblib.load(\"../RF/RF_p4_Emhjellen2.joblib\")\n", "RF_p5_model = joblib.load(\"../RF/RF_p5_Emhjellen2.joblib\")\n", "RF_p6_model = joblib.load(\"../RF/RF_p6_Emhjellen2.joblib\")\n", "RF_p7_model = joblib.load(\"../RF/RF_p7_Emhjellen2.joblib\")\n", "RF_p8_model = joblib.load(\"../RF/RF_p8_Emhjellen2.joblib\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "missing_bedrocks = ['Graphitic schist', 'Cataclasite', 'Dioritic gneis', 'Trondhjemite', 'Norite', 'Sedimentary breccia', 'Andesite', 'Mafic dyke (Diabase, Dolerite)', 'Gabbro', 'Quartz diorite', 'Greenstone', 'Dacite', 'Calc-silicate rock', 'Mangerite', 'Metasandstone', 'Quartz schist', 'Phyllite', 'Tonalite', 'Calcareous mica schist', 'Granite', 'Eclogite', 'Pegmatite/aplite', 'Pyroclastic rock', 'Mica gneiss', 'Mafic volcanic rock', 'Konglomerate', 'Intermediate volcanic rock', 'Migmatite', 'Calcareous phyllite', 'Quartzite', 'Diorite', 'Arkose', 'Volcanic breccia', 'Sandstone', 'Garnet mica schist', 'Siltstone', 'Calcite marble', 'Shale', 'Mylonite/Phyllonite', 'Granodioritic gneiss', 'Amphibole gneiss', 'Orthopyroxene gneiss', 'Syenite', 'Rhyolite', 'Banded gneiss', 'Quartz dioritic gneiss', 'Limestone', 'Metagreywacke', 'Basalt', 'Mica schist', 'Metagabbro', 'Granodiorite', 'Felsic volcanic rock', 'Tuffite', 'Meta-arkose', 'Greywacke', 'Peridotite', 'Pyroksenite', 'Augengneiss', 'Serpentinite', 'Greenschist', 'Amphibole schist', 'Amphibolite', 'Monzonitic gneiss']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Graphitic schist\n", "Cataclasite\n", "Dioritic gneis\n", "Trondhjemite\n", "Norite\n", "Sedimentary breccia\n", "Andesite\n", "Mafic dyke (Diabase, Dolerite)\n", "Gabbro\n", "Quartz diorite\n", "Greenstone\n", "Dacite\n", "Calc-silicate rock\n", "Mangerite\n", "Metasandstone\n", "Quartz schist\n", "Phyllite\n", "Tonalite\n", "Calcareous mica schist\n", "Granite\n", "Eclogite\n", "Pegmatite/aplite\n", "Pyroclastic rock\n", "Mica gneiss\n", "Mafic volcanic rock\n", "Konglomerate\n", "Intermediate volcanic rock\n", "Migmatite\n", "Calcareous phyllite\n", "Quartzite\n", "Diorite\n", "Arkose\n", "Volcanic breccia\n", "Sandstone\n", "Garnet mica schist\n", "Siltstone\n", "Calcite marble\n", "Shale\n", "Mylonite/Phyllonite\n", "Granodioritic gneiss\n", "Amphibole gneiss\n", "Orthopyroxene gneiss\n", "Syenite\n", "Rhyolite\n", "Banded gneiss\n", "Quartz dioritic gneiss\n", "Limestone\n", "Metagreywacke\n", "Basalt\n", "Mica schist\n", "Metagabbro\n", "Granodiorite\n", "Felsic volcanic rock\n", "Tuffite\n", "Meta-arkose\n", "Greywacke\n", "Peridotite\n", "Pyroksenite\n", "Augengneiss\n", "Serpentinite\n", "Greenschist\n", "Amphibole schist\n", "Amphibolite\n", "Monzonitic gneiss\n" ] } ], "source": [ "for i in missing_bedrocks:\n", " print(i)\n", " df_database[i] = 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "df_database['Arkose'] = 0\n", "df_database['Greenstone'] = 0 \n", "df_database['Metagreywacke'] = 0\n", "df_database['Tonalite'] = 0\n", "df_database['Peridotite'] = 0\n", "df_database['Meta-arkose'] = 0\n", "df_database['Dioritic gneis'] = 0\n", "df_database['Pyroclastic rock'] = 0\n", "df_database['Quartz diorite'] = 0\n", "df_database['Sedimentary breccia'] = 0\n", "df_database['Andesite'] = 0\n", "df_database['Pyroksenite'] = 0\n", "df_database['Charnockite'] = 0\n", "df_database['Shale'] = 0\n", "df_database['Mica schist'] = 0\n", "df_database['Intermediate volcanic rock'] = 0\n", "df_database['Trondhjemite'] = 0\n", "df_database['Amphibole schist'] = 0\n", "df_database['Serpentinite'] = 0\n", "df_database['Mangerite'] = 0\n", "df_database['Felsic volcanic rock'] = 0\n", "df_database['Greywacke'] = 0\n", "df_database['Pegmatite/aplite'] = 0\n", "df_database['Tuffite'] = 0\n", "df_database['Mafic dyke (Diabase, Dolerite)'] = 0\n", "df_database['Mafic volcanic rock'] = 0\n", "df_database['Calcareous mica schist'] = 0\n", "df_database['Diorite'] = 0\n", "df_database['Garnet mica schist'] = 0\n", "df_database['Limestone'] = 0\n", "df_database['Basalt'] = 0\n", "df_database['Graphitic schist'] = 0\n", "df_database['Norite'] = 0\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# feature combinations\n", "p1 = df_database[['Slope']]\n", "\n", "p2 = df_database[['Slope','Elevation']]\n", "\n", "p3 = df_database[['Slope','North','East','North East','North West','South','South East','South West','West']]\n", "\n", "p4 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI','Distance_to_roads']]\n", "\n", "p5 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']]\n", "\n", "p6 = df_database[['Slope','Elevation','Plan_curv','Profile_curv','TRI']]\n", "\n", "p7 = df_database[['Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads']]\n", "\n", "p8 = df_database[['Slope','Elevation','North','East','North East','North West','South','South East','South West','West','Plan_curv','Profile_curv','TRI','Flow_dir','Flow_acc','Distance_to_roads',\n", " 'Granite','Granodiorite','Tonalite','Trondhjemite','Syenite','Monzonite','Monzodiorite','Quartz diorite','Diorite','Gabbro','Norite','Peridotite','Pyroksenite','Charnockite','Mangerite','Anorthosite','Mafic dyke (Diabase, Dolerite)','Pegmatite/aplite','Felsic volcanic rock','Rhyolite','Dacite','Intermediate volcanic rock','Andesite','Mafic volcanic rock','Basalt',\n", " 'Pyroclastic rock','Volcanic breccia','Siltstone','Sandstone','Greywacke','Arkose','Konglomerate','Sedimentary breccia','Limestone','Tuffite','Shale','Phyllite','Mica schist','Garnet mica schist','Calcareous phyllite','Calcareous mica schist','Amphibole schist','Graphitic schist','Calcite marble',\n", " 'Metasandstone','Metagreywacke','Meta-arkose','Quartzite','Quartz schist','Mica gneiss','Calc-silicate rock','Amphibole gneiss','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Quartz dioritic gneiss','Monzonitic gneiss','Dioritic gneis','Orthopyroxene gneiss','Migmatite','Augengneiss',\n", " 'Banded gneiss','Greenschist','Greenstone','Amphibolite','Metagabbro','Eclogite','Serpentinite','Mylonite/Phyllonite','Cataclasite']]\n", "\n", "feature_combinations = [p1,p2,p3,p4,p5,p6,p7,p8]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "x_p1 = p1[~np.isnan(p1).any(axis=1)]\n", "x_p2 = p2[~np.isnan(p2).any(axis=1)]\n", "x_p3 = p3[~np.isnan(p3).any(axis=1)]\n", "x_p4 = p4[~np.isnan(p4).any(axis=1)]\n", "x_p5 = p5[~np.isnan(p5).any(axis=1)]\n", "x_p6 = p6[~np.isnan(p6).any(axis=1)]\n", "x_p7 = p7[~np.isnan(p7).any(axis=1)]\n", "x_p8 = p8[~np.isnan(p8).any(axis=1)]\n", "\n", "x_p = [x_p1,x_p2,x_p3,x_p4,x_p5,x_p6,x_p7,x_p8]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "rf_models = [RF_p1_model,RF_p2_model,RF_p3_model,RF_p4_model,RF_p5_model,RF_p6_model,RF_p7_model,RF_p8_model]\n", "\n", "predictions = []\n", "pred_probabilities = []\n", "\n", "for i in range(0,len(x_p)):\n", " \n", " AUTO_SCALING = True\n", " if AUTO_SCALING:\n", " scaler = StandardScaler()\n", " scaler.fit(x_p[i])\n", " x_p_i = scaler.transform(x_p[i])\n", " \n", " p_i_predictions = rf_models[i].predict(x_p_i)\n", " predictions.append(p_i_predictions)\n", " \n", " prob_p_i = rf_models[i].predict_proba(x_p_i)\n", " pred_probabilities.append(prob_p_i)\n", " " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "df_coor = df_database[['POINT_X', 'POINT_Y']]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[9.99963262e-01, 3.67384980e-05],\n", " [9.96216110e-01, 3.78389014e-03],\n", " [9.94593860e-01, 5.40614012e-03],\n", " ...,\n", " [9.93907681e-01, 6.09231878e-03],\n", " [1.00000000e+00, 0.00000000e+00],\n", " [1.00000000e+00, 0.00000000e+00]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pred_probabilities[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p1'] = predictions[0]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p1'] = pred_probabilities[0][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p1'] = predictions[0]\n", "df_coor['RF_Vestland_prob_p1'] = pred_probabilities[0][:,1]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p2'] = predictions[1]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p2'] = pred_probabilities[1][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p2'] = predictions[1]\n", "df_coor['RF_Vestland_prob_p2'] = pred_probabilities[1][:,1]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p3'] = predictions[2]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p3'] = pred_probabilities[2][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p3'] = predictions[2]\n", "df_coor['RF_Vestland_prob_p3'] = pred_probabilities[2][:,1]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p4'] = predictions[3]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p4'] = pred_probabilities[3][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p4'] = predictions[3]\n", "df_coor['RF_Vestland_prob_p4'] = pred_probabilities[3][:,1]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p5'] = predictions[4]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p5'] = pred_probabilities[4][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p5'] = predictions[4]\n", "df_coor['RF_Vestland_prob_p5'] = pred_probabilities[4][:,1]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p6'] = predictions[5]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p6'] = pred_probabilities[5][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p6'] = predictions[5]\n", "df_coor['RF_Vestland_prob_p6'] = pred_probabilities[5][:,1]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p7'] = predictions[6]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p7'] = pred_probabilities[6][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p7'] = predictions[6]\n", "df_coor['RF_Vestland_prob_p7'] = pred_probabilities[6][:,1]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_pred_p8'] = predictions[7]\n", ":2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df_coor['RF_Vestland_prob_p8'] = pred_probabilities[7][:,1]\n" ] } ], "source": [ "df_coor['RF_Vestland_pred_p8'] = predictions[7]\n", "df_coor['RF_Vestland_prob_p8'] = pred_probabilities[7][:,1]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "df_coor.to_csv('RF_results_lærdal_aurland_close_10.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }