{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Vestland database for ML\n", "\n", "Linn Alexandra Emhjellen. 2021" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import os\n", "import joblib\n", "import matplotlib.pyplot as plt\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# random pixels from Vestland with GIS-features. For validation at the end\n", "#df_random_Vestland = pd.read_excel('Vestland_pixels_2.xlsx')\n", "#df_random_Vestland = pd.read_excel('Lærdal_Aurland_500k_pixels2.xlsx')\n", "#df_Vestland_database = pd.read_excel('test_area_pixel_features.xlsx')\n", "#df_Vestland_database = pd.read_excel('Lærdal_gård_bø_fishnet20_label_features.xlsx')\n", "#df_Vestland_database = pd.read_excel('copy of Lærdal_Aurland_close_area_10_m_pixels_features.xlsx')\n", "\n", "#df_Vestland_database = pd.read_csv('lærdal_aurland_10m_pixels_close.csv')\n", "\n", "#df_Vestland_database = pd.read_csv('Vestland_West_features_10m.csv')\n", "df_Vestland_database = pd.read_csv('Bohme_features_20m.csv')\n", "df_Vestland_database = df_Vestland_database.dropna()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array([108, 113, 143, 402, 423, 426, 432, 437, 440], dtype=int64)]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#encode bedrock values\n", "\n", "bedrock = df_Vestland_database[[\"Bedrock\"]]\n", "\n", "bedrock_encoder = OneHotEncoder()\n", "bedrock_encoded = bedrock_encoder.fit_transform(bedrock)\n", "\n", "bedrock = bedrock_encoded.toarray()\n", "bedrock_encoder.categories_" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# sorted same as array \n", "\n", "#bedrock_groups = ['Granit','Granodiorite','Monzonite','Monzodiorite', 'Gabbro','Pyroksenite','Charnockite','Anorthosite','Rhyolite','Phyllite','Calcareous phyllite','Metasandstone', 'Quartzite','Quartz schist','Mica gneiss','Amphibole gneiss','Granitic gneiss','Tonalitic gneiss','Monzonitic gneiss','Orthopyroxene gneiss','Banded gneiss','Amphibolite','Metagabbro']\n", "\n", "#bedrock_Vestland = ['Granite','Granodiorite','Tonalite','Trondhjemite','Syenite','Monzonite','Monzodiorite','Quartz diorite','Diorite','Gabbro','Norite','Peridotite','Pyroksenite','Charnockite','Mangerite','Anorthosite','Mafic dyke (Diabase, Dolerite)','Pegmatite/aplite','Felsic volcanic rock','Rhyolite','Dacite','Intermediate volcanic rock','Andesite','Mafic volcanic rock','Basalt',\n", " # 'Pyroclastic rock','Volcanic breccia','Siltstone','Sandstone','Greywacke','Arkose','Konglomerate','Sedimentary breccia','Limestone','Tuffite','Shale','Phyllite','Mica schist','Garnet mica schist','Calcareous phyllite','Calcareous mica schist','Amphibole schist','Graphitic schist','Calcite marble',\n", " #'Metasandstone','Metagreywacke','Meta-arkose','Quartzite','Quartz schist','Mica gneiss','Calc-silicate rock','Amphibole gneiss','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Quartz dioritic gneiss','Monzonitic gneiss','Dioritic gneis','Orthopyroxene gneiss','Migmatite','Augengneiss',\n", " #'Banded gneiss','Greenschist','Greenstone','Amphibolite','Metagabbro','Eclogite','Serpentinite','Mylonite/Phyllonite','Cataclasite']\n", "\n", "#bedrock_Vestland = ['Granite','Granodiorite','Monzonite','Monzodiorite','Quartx diorite','Gabbro','Pyroksenitt','Charnockitt','Anorthosite','Rhyolite','Phyllite','Calcareous phyllite','Metasandstone','Quartzite','Quartz schist','Mica gneiss','Amphibole gneiss','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Monzonitic gneiss','Orthopyroxene gneiss','Migmatite','Augengneiss','Banded gneiss','Amphibolite','Metagabbro','Mylonite/Phyllonite','Cataclasite']\n", "#bedrock_Vestland = ['Granite','Monzonite','Monzodiorite','Charnockite','Granitic gneiss','Tonalitic gneiss']\n", "#bedrock_Vestland = ['Granite','Granodiorite','Monzonite','Monzodiorite','Gabbro','Charnockite','Anorthosite','Phyllite','Quartzite','Granitic gneiss','Tonalitic gneiss','Orthopyroxene gneiss','Cataclasite']\n", "\n", "#bedrock_Vestland = ['Granit','Monzonite','Monzodiorite','Charnockite','Anorthosite','Granitic gneiss','Tonalitic gneiss']\n", "#bedrock_Vestland = ['Tonalite','Diorite','Gabbro','Anorthosite','Rhyolite','Dacite','Sandstone','Arkose','Konglomerate','Phyllite',\n", " # 'Mica schist','Quartzite','Quartz schist','Granitic gneiss','Granodioritic gneiss','Tonalitic gneiss','Migmatite','Banded gneiss','Greenschist',\n", " #'Greenstone','Amphibolite','Metagabbro','Serpentinite']\n", " \n", "#bedrock_Vestland = ['Dacite','Sandstone','Konglomerate','Mica schist','Quartz schist','Granitic gneiss','Tonalitic gneiss','Banded gneiss','Amphibolite']\n", "bedrock_Vestland = ['Monzonite','Gabbro','Anorthosite','Phyllite','Tonalitic gneiss','Mica gneiss','Tonalitic gneiss','Orthopyroxene gneiss','Migmatite']\n", "df_bedrock = pd.DataFrame(bedrock, columns=bedrock_Vestland)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# merging the one-hot-encoded bedrock dataframe to the other parameters\n", "df_Vestland_database = df_Vestland_database.reset_index() #Need to do this, don't know why indexes was changed.\n", "df_Vestland_database = pd.concat([df_Vestland_database, df_bedrock], axis=1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "aspect_categorical = []\n", "\n", "for i in df_Vestland_database['Aspect']:\n", " if (i== -1):\n", " aspect_categorical.append('Flat')\n", " elif (i<= 22.5) & (i >= 0) or (i>= 337.5) & (i < 360):\n", " aspect_categorical.append('North')\n", " elif (i<= 67.5) & (i > 22.5):\n", " aspect_categorical.append('North East')\n", " elif (i<= 112.5) & (i > 67.5):\n", " aspect_categorical.append('East')\n", " elif (i <= 157.5) & (i > 112.5):\n", " aspect_categorical.append('South East')\n", " elif (i <= 202.5) & (i > 157.5):\n", " aspect_categorical.append('South')\n", " elif (i<= 247.5) & (i > 202.5):\n", " aspect_categorical.append('South West')\n", " elif (i<= 292.5) & (i > 247.5):\n", " aspect_categorical.append('West')\n", " elif (i<= 337.5) & (i > 292.5):\n", " aspect_categorical.append('North West')\n", " else:\n", " aspect_categorical.append('NaN')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df_Vestland_database['aspect_categorical'] = aspect_categorical" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df_Vestland_database = df_Vestland_database.dropna()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array(['East', 'Flat', 'North', 'North East', 'North West', 'South',\n", " 'South East', 'South West', 'West'], dtype=object)]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# encode aspect values\n", "\n", "from sklearn.preprocessing import OneHotEncoder\n", "aspect_cat = df_Vestland_database[[\"aspect_categorical\"]]\n", "\n", "aspect_encoder = OneHotEncoder()\n", "aspect_encoded = aspect_encoder.fit_transform(aspect_cat)\n", "\n", "aspect= aspect_encoded.toarray()\n", "aspect_encoder.categories_" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# sorted same as array \n", "aspect_groups = ('East','Flat','North','North East','North West','South','South East','South West','West')\n", "\n", "df_aspect = pd.DataFrame(aspect, columns=aspect_groups)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# adding Flat as a column as well\n", "#array = np.zeros([len(df_Vestland_database),1])\n", "#df_flat = pd.DataFrame(array, columns = ['Flat'])\n", "\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | index | \n", "OID_ | \n", "POINT_X | \n", "POINT_Y | \n", "Elevation | \n", "Slope | \n", "Aspect | \n", "Curvature | \n", "Profile_curv | \n", "Plan_curv | \n", "... | \n", "Bedrock | \n", "Monzonite | \n", "Diorite | \n", "Anorthosite | \n", "Garnet mica schist | \n", "Mica gneiss | \n", "Tonalitic gneiss | \n", "Orthopyroxene gneiss | \n", "Migmatite | \n", "aspect_categorical | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0 | \n", "1 | \n", "61314.5741 | \n", "6.804053e+06 | \n", "785.760925 | \n", "42.661743 | \n", "94.211853 | \n", "3.051331 | \n", "-2.636860 | \n", "0.414471 | \n", "... | \n", "113 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
1 | \n", "1 | \n", "2 | \n", "61324.5741 | \n", "6.804053e+06 | \n", "775.545593 | \n", "44.813194 | \n", "94.644402 | \n", "0.926636 | \n", "0.748065 | \n", "1.674701 | \n", "... | \n", "113 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
2 | \n", "2 | \n", "3 | \n", "61334.5741 | \n", "6.804053e+06 | \n", "766.137207 | \n", "39.271423 | \n", "96.103744 | \n", "-1.371216 | \n", "2.497010 | \n", "1.125794 | \n", "... | \n", "113 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
3 | \n", "3 | \n", "4 | \n", "61344.5741 | \n", "6.804053e+06 | \n", "759.246948 | \n", "32.916458 | \n", "99.230545 | \n", "0.535461 | \n", "0.843176 | \n", "1.378637 | \n", "... | \n", "113 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
4 | \n", "4 | \n", "5 | \n", "61354.5741 | \n", "6.804053e+06 | \n", "753.402039 | \n", "29.343985 | \n", "103.750122 | \n", "0.747192 | \n", "0.374238 | \n", "1.121430 | \n", "... | \n", "113 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2047930 | \n", "2562256 | \n", "2562257 | \n", "69554.5741 | \n", "6.816453e+06 | \n", "661.918884 | \n", "8.343645 | \n", "15.816636 | \n", "-2.953735 | \n", "1.943575 | \n", "-1.010160 | \n", "... | \n", "108 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "North | \n", "
2047931 | \n", "2562257 | \n", "2562258 | \n", "69564.5741 | \n", "6.816453e+06 | \n", "661.912476 | \n", "9.202660 | \n", "21.742188 | \n", "0.115234 | \n", "0.320242 | \n", "0.435476 | \n", "... | \n", "108 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "North | \n", "
2047932 | \n", "2562258 | \n", "2562259 | \n", "69574.5741 | \n", "6.816453e+06 | \n", "661.536682 | \n", "9.152455 | \n", "44.141239 | \n", "2.452026 | \n", "-0.862458 | \n", "1.589569 | \n", "... | \n", "108 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "North East | \n", "
2047933 | \n", "2562259 | \n", "2562260 | \n", "69584.5741 | \n", "6.816453e+06 | \n", "659.753784 | \n", "10.504866 | \n", "62.287079 | \n", "0.200867 | \n", "0.062948 | \n", "0.263814 | \n", "... | \n", "108 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "North East | \n", "
2047934 | \n", "2562260 | \n", "2562261 | \n", "69594.5741 | \n", "6.816453e+06 | \n", "657.857422 | \n", "11.790175 | \n", "68.847740 | \n", "-0.410706 | \n", "-0.070160 | \n", "-0.480866 | \n", "... | \n", "108 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "East | \n", "
2047935 rows × 24 columns
\n", "