diff --git a/gen_train_data/gen_train_data.ipynb b/gen_train_data/gen_train_data.ipynb index f7061aed333dabaadbbe09cd89ddd44bad015352..8f6a7e04713d0933e8edad19936da054c9c3670f 100644 --- a/gen_train_data/gen_train_data.ipynb +++ b/gen_train_data/gen_train_data.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -25,13 +25,13 @@ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", - "from imblearn.over_sampling import SMOTE\n", + "from imblearn.combine import SMOTETomek\n", "from imblearn.under_sampling import TomekLinks" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -90,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -116,19 +116,19 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# OVERSAMPLING: SMOTE\n", - "smote = SMOTE()\n", - "X_train_over_pre, y_train_over_pre = smote.fit_resample(X_train_pre, y_train_pre)\n", - "X_train_over_post, y_train_over_post = smote.fit_resample(X_train_post, y_train_post)" + "# OVERSAMPLED training data\n", + "smote_tomek = SMOTETomek()\n", + "X_train_over_pre, y_train_over_pre = smote_tomek.fit_resample(X_train_pre, y_train_pre)\n", + "X_train_over_post, y_train_over_post = smote_tomek.fit_resample(X_train_post, y_train_post)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [