{ "cells": [ { "cell_type": "markdown", "id": "68f285c6-8073-4a79-a499-8275374fc144", "metadata": {}, "source": [ "
\n", "
\n", "\n", "
\n", "
\n", "

May 2022

\n", "

ML Depresión

\n", "

Lucía Prieto Santamaría

\n", "
\n", "
\n", "
 
" ] }, { "cell_type": "markdown", "id": "282ea5a1-81e0-4e1a-ab3e-c8b261a25acc", "metadata": {}, "source": [ "# ML models for Twitter Depresion datasets sampling after split and with tunning of hyperparameters" ] }, { "cell_type": "markdown", "id": "59109d23-17dd-4df9-86e6-14a74bd822a8", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "a2f0d919-069f-4a89-80f3-b08bfeeceb5f", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "f6580066-46d8-4f75-9fd8-fd02e40f7886", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "c1cfa7c6-655e-4f3c-8971-5debebb09a7f", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "56249b5c-9ac8-40b0-a870-cb650065fd5b", "metadata": {}, "source": [ "Import libraries for data processing and visualizing " ] }, { "cell_type": "code", "execution_count": 1, "id": "4c9b7e83-220c-42ab-aa02-c78d92a65325", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import itertools\n", "\n", "\n", "#from sklearn.pipeline import Pipeline\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import train_test_split\n", "\n", "from imblearn.pipeline import Pipeline\n", "from imblearn.over_sampling import SMOTE\n", "from imblearn.over_sampling import ADASYN\n", "\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier\n", "from sklearn.neural_network import MLPClassifier\n", "from xgboost import XGBClassifier\n", "\n", "from sklearn import metrics\n", "\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "id": "0caad30d-3234-4fed-a099-1c3b447ca309", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "36a1ee8a-6001-4b14-88d1-4d462b19ca26", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "1837b7fc-371b-4275-9308-3f9e2a41ddda", "metadata": {}, "source": [ "\n", "\n" ] }, { "cell_type": "markdown", "id": "375f1b8e-48b2-4c1e-8898-2fe20da1af9f", "metadata": {}, "source": [ "Import dataframes" ] }, { "cell_type": "markdown", "id": "8da3d6dd-b0d5-4288-8fc2-f26d3bcbb2cf", "metadata": {}, "source": [ "| Dataset | NUM vs AVG-PERC | Time variables | PCA |\n", "|:-:|:-:|:-:|:-:|\n", "| **df0** | Both | Original | No |\n", "| | | | |\n", "| **df1** | NUM | Original | No |\n", "| **df2** | PERC/AVG | Original | No |\n", "| **df3** | NUM | Grouped | No |\n", "| **df4** | PERC/AVG | Grouped | No |\n", "| **df5** | NUM | Original | Yes |\n", "| **df6** | PERC/AVG | Original | Yes |\n", "| **df7** | NUM | Grouped | Yes |\n", "| **df8** | PERC/AVG | Grouped | Yes |" ] }, { "cell_type": "code", "execution_count": 2, "id": "a299fb98-4714-4deb-b3a0-88aaeeba2204", "metadata": {}, "outputs": [], "source": [ "df0 = pd.read_csv('datasets/d0.csv', sep=';')\n", "df1 = pd.read_csv('datasets/d1.csv', sep=';')\n", "df2 = pd.read_csv('datasets/d2.csv', sep=';')\n", "df3 = pd.read_csv('datasets/d3.csv', sep=';')\n", "df4 = pd.read_csv('datasets/d4.csv', sep=';')\n", "df5 = pd.read_csv('datasets/d5.csv', sep=';')\n", "df6 = pd.read_csv('datasets/d6.csv', sep=';')\n", "df7 = pd.read_csv('datasets/d7.csv', sep=';')\n", "df8 = pd.read_csv('datasets/d8.csv', sep=';')" ] }, { "cell_type": "code", "execution_count": 3, "id": "64236e92-4f21-4f16-bdc6-944243351af5", "metadata": {}, "outputs": [], "source": [ "datasets = {'d_original': df0,\n", " 'd_num_to': df1,\n", " 'd_perc_to': df2,\n", " 'd_num_tg': df3,\n", " 'd_perc_tg': df4,\n", " 'd_num_to_pca': df5,\n", " 'd_perc_to_pca': df6,\n", " 'd_num_tg_pca': df7,\n", " 'd_perc_tg_pca': df8}" ] }, { "cell_type": "code", "execution_count": 4, "id": "39e0ea73-38fa-4e71-abe2-1ef094550c8c", "metadata": {}, "outputs": [], "source": [ "for d in datasets.keys():\n", " datasets[d].loc[datasets[d]['GROUP'] == 'DEPRESSIVE','GROUP'] = 1\n", " datasets[d].loc[datasets[d]['GROUP'] == 'CONTROL','GROUP'] = 0\n", " datasets[d]['GROUP'] = datasets[d]['GROUP'].astype('int')" ] }, { "cell_type": "code", "execution_count": 5, "id": "caf48000-c47e-4985-b682-ff1448182d10", "metadata": {}, "outputs": [], "source": [ "data = {'d_original': dict(),\n", " 'd_num_to': dict(),\n", " 'd_perc_to': dict(),\n", " 'd_num_tg': dict(),\n", " 'd_perc_tg': dict(),\n", " 'd_num_to_pca': dict(),\n", " 'd_perc_to_pca': dict(),\n", " 'd_num_tg_pca': dict(),\n", " 'd_perc_tg_pca': dict()}\n", "\n", "for d in data.keys():\n", " data[d]['data'], data[d]['target'] = datasets[d].drop(datasets[d].columns[-1], axis=1).to_numpy(), datasets[d].GROUP.to_numpy()\n", " data[d]['X_train'], data[d]['X_test'], data[d]['y_train'], data[d]['y_test'] = train_test_split(data[d]['data'], \n", " data[d]['target'], \n", " test_size = 0.10, \n", " stratify=data[d]['target'])" ] }, { "cell_type": "markdown", "id": "8a5a0448-e7a7-4f6a-aad5-997b76dd9913", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "markdown", "id": "4aebb0f8-d3f9-4a1a-b8a9-af76edc82281", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "markdown", "id": "19fdaa1a-f0a7-4b0a-b580-22ab069d39a1", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "markdown", "id": "8987454d-fb6d-4a79-aa62-be11ce8c3110", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "markdown", "id": "cf66bb11-b7f9-4064-9f59-0dc92c19ed81", "metadata": { "tags": [] }, "source": [ "### Tunning hyperparameters" ] }, { "cell_type": "code", "execution_count": 6, "id": "a815dddf-9cf6-48f7-b387-91252271f34e", "metadata": { "tags": [] }, "outputs": [], "source": [ "R = 12345" ] }, { "cell_type": "markdown", "id": "0eb5db2d-d1d6-408f-a7fa-f27e143e2f9f", "metadata": {}, "source": [ "Sampling after splitting" ] }, { "cell_type": "code", "execution_count": 7, "id": "76bffa09-683d-4eb5-9cbd-85bfc291cfda", "metadata": {}, "outputs": [], "source": [ "oversample_smote = SMOTE(sampling_strategy = 'minority', \n", " random_state=R)\n", "\n", "oversample_adasyn = ADASYN(sampling_strategy = 'minority', \n", " random_state=R)" ] }, { "cell_type": "code", "execution_count": 8, "id": "f6f0760d-8310-4d35-8eef-9153fe5f5729", "metadata": {}, "outputs": [], "source": [ "params_grid = [{\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator':[SVC()],\n", " 'estimator__C': [100, 10, 1.0, 0.1, 0.001],\n", " 'estimator__gamma': [0.001, 0.0001],\n", " 'estimator__kernel': ['poly', 'rbf', 'sigmoid'],\n", " },\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [DecisionTreeClassifier()],\n", " 'estimator__max_depth': [1,2,3,4,5],\n", " 'estimator__max_features': [None, \"auto\", \"sqrt\", \"log2\"],\n", " },\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [RandomForestClassifier()],\n", " 'estimator__n_estimators':[100, 150, 200],\n", " 'estimator__criterion':[\"gini\", \"entropy\"],\n", " 'estimator__max_depth' : [3, 4, 5]},\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [AdaBoostClassifier()],\n", " 'estimator__n_estimators':[20, 30, 40],\n", " 'estimator__learning_rate':[0.1, 0.5, 1]},\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [BaggingClassifier()],\n", " 'estimator__n_estimators':[10, 100, 1000],\n", " 'estimator__max_samples' : [0.05, 0.1, 0.2, 0.5]},\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [XGBClassifier(random_state=R, verbosity = 0)],\n", " 'estimator__max_depth':[2, 3, 5, 7, 10],\n", " 'estimator__n_estimators':[10, 100, 500],\n", " },\n", " {\n", " 'sampling':[None, oversample_smote, oversample_adasyn],\n", " 'estimator': [MLPClassifier(max_iter=100)],\n", " 'estimator__solver': ['sgd', 'adam'],\n", " 'estimator__learning_rate': ['constant','adaptive'],\n", " 'estimator__hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],\n", " 'estimator__activation': ['tanh', 'relu'],\n", " 'estimator__alpha': [0.0001, 0.05],\n", " 'estimator__early_stopping': [True, False]\n", " },\n", "\n", " ]" ] }, { "cell_type": "markdown", "id": "ced7cd9b-3aa7-41e1-8389-53d24ae677c9", "metadata": {}, "source": [ "### Grid search CV" ] }, { "cell_type": "code", "execution_count": 9, "id": "f06455e0-edbd-44e8-9359-0ae008821ba5", "metadata": {}, "outputs": [], "source": [ "scoring_metrics = ['accuracy', \n", " 'recall', \n", " 'precision',\n", " 'f1', \n", " 'roc_auc']" ] }, { "cell_type": "code", "execution_count": 10, "id": "95f085ba-f9ef-4b42-9e9c-190cf42538fc", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 11, "id": "1dc28e89-c43c-4858-a5ea-6db72109b2ec", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tunning hyperparameters for: d_original\n", "Tunning hyperparameters for: d_num_to\n", "Tunning hyperparameters for: d_perc_to\n", "Tunning hyperparameters for: d_num_tg\n", "Tunning hyperparameters for: d_perc_tg\n", "Tunning hyperparameters for: d_num_to_pca\n", "Tunning hyperparameters for: d_perc_to_pca\n", "Tunning hyperparameters for: d_num_tg_pca\n", "Tunning hyperparameters for: d_perc_tg_pca\n" ] } ], "source": [ "for d in data.keys():\n", " \n", " print('Tunning hyperparameters for: ', d)\n", " \n", " if d in ['d_num_to_pca', 'd_perc_to_pca', 'd_num_tg_pca', 'd_perc_tg_pca']:\n", " pipe = Pipeline(steps=[\n", " ('sampling', None),\n", " ('estimator', SVC())])\n", " grid = GridSearchCV(pipe, \n", " params_grid, \n", " scoring=scoring_metrics, \n", " cv=10,\n", " n_jobs=-1,\n", " refit='accuracy')\n", " grid.fit(data[d]['X_train'], data[d]['y_train'])\n", " data[d]['cv_results'] = pd.DataFrame(grid.cv_results_)\n", " \n", " else:\n", " pipe = Pipeline(steps=[\n", " ('sampling', None),\n", " ('scaler', StandardScaler()),\n", " ('estimator', SVC())])\n", " grid = GridSearchCV(pipe, \n", " params_grid, \n", " scoring=scoring_metrics, \n", " cv=10,\n", " n_jobs=-1,\n", " refit='accuracy')\n", " grid.fit(data[d]['X_train'], data[d]['y_train'])\n", " data[d]['cv_results'] = pd.DataFrame(grid.cv_results_)" ] }, { "cell_type": "code", "execution_count": 12, "id": "7587b68d-b57d-4370-9d8f-7c95913038e3", "metadata": {}, "outputs": [], "source": [ "results = dict()\n", "\n", "for d in data.keys():\n", " results[d] = data[d]['cv_results']\n", " \n", "for d, df in results.items():\n", " df['dataset'] = d\n", "\n", "results_final_df = pd.concat(sorted(results.values(), key=lambda df: df['dataset'][0]), ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 13, "id": "defa15b8-2634-4a17-99e7-28b5f2ef915f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_estimatorparam_estimator__Cparam_estimator__gammaparam_estimator__kernelparam_samplingparam_estimator__max_depth...split4_test_roc_aucsplit5_test_roc_aucsplit6_test_roc_aucsplit7_test_roc_aucsplit8_test_roc_aucsplit9_test_roc_aucmean_test_roc_aucstd_test_roc_aucrank_test_roc_aucdataset
00.0071820.0006470.0067310.000907SVC(C=100, gamma=0.001)1000.001polyNoneNaN...1.0000000.8833330.9156250.8000000.9312500.8656250.8962910.050239373d_num_tg
10.0496000.0147850.0099350.000351SVC(C=100, gamma=0.001)1000.001polySMOTE(random_state=12345, sampling_strategy='m...NaN...0.8750000.7972220.7625000.8968750.9343750.6968750.8368340.094692429d_num_tg
20.0533340.0223720.0102350.000578SVC(C=100, gamma=0.001)1000.001polyADASYN(random_state=12345, sampling_strategy='...NaN...0.7560980.8333330.7406250.9062500.8781250.6750000.8176630.087314441d_num_tg
30.0098540.0073830.0084470.003213SVC(C=100, gamma=0.001)1000.001rbfNoneNaN...0.9847561.0000000.8281251.0000000.9968750.9156250.9664410.05186967d_num_tg
40.0250100.0048580.0128940.002616SVC(C=100, gamma=0.001)1000.001rbfSMOTE(random_state=12345, sampling_strategy='m...NaN...0.9878050.9944440.8656250.9718751.0000000.9125000.9637740.04099385d_num_tg
..................................................................
53950.5637920.0815060.0099730.004184MLPClassifier(alpha=0.05, hidden_layer_sizes=(...NaNNaNNaNSMOTE(random_state=12345, sampling_strategy='m...NaN...0.8841460.9916670.9593750.9781250.9937500.9281250.9521770.031330335d_perc_to_pca
53960.5216050.0405240.0075800.000798MLPClassifier(alpha=0.05, hidden_layer_sizes=(...NaNNaNNaNADASYN(random_state=12345, sampling_strategy='...NaN...0.9054880.9444440.9687500.9593750.9656250.9000000.9326610.026477434d_perc_to_pca
53970.3722050.0178510.0065830.000662MLPClassifier(alpha=0.05, hidden_layer_sizes=(...NaNNaNNaNNoneNaN...0.9115850.9972220.9937500.9593750.9906250.9625000.9775420.02581119d_perc_to_pca
53980.6687910.0798950.0084110.002076MLPClassifier(alpha=0.05, hidden_layer_sizes=(...NaNNaNNaNSMOTE(random_state=12345, sampling_strategy='m...NaN...0.9420730.9972220.9906250.9562500.9906250.9656250.9787540.0182979d_perc_to_pca
53990.5510880.0634890.0065820.000798MLPClassifier(alpha=0.05, hidden_layer_sizes=(...NaNNaNNaNADASYN(random_state=12345, sampling_strategy='...NaN...0.9481710.9972220.9843750.9531250.9812500.9593750.9744250.01561150d_perc_to_pca
\n", "

5400 rows × 87 columns

\n", "
" ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", "0 0.007182 0.000647 0.006731 0.000907 \n", "1 0.049600 0.014785 0.009935 0.000351 \n", "2 0.053334 0.022372 0.010235 0.000578 \n", "3 0.009854 0.007383 0.008447 0.003213 \n", "4 0.025010 0.004858 0.012894 0.002616 \n", "... ... ... ... ... \n", "5395 0.563792 0.081506 0.009973 0.004184 \n", "5396 0.521605 0.040524 0.007580 0.000798 \n", "5397 0.372205 0.017851 0.006583 0.000662 \n", "5398 0.668791 0.079895 0.008411 0.002076 \n", "5399 0.551088 0.063489 0.006582 0.000798 \n", "\n", " param_estimator param_estimator__C \\\n", "0 SVC(C=100, gamma=0.001) 100 \n", "1 SVC(C=100, gamma=0.001) 100 \n", "2 SVC(C=100, gamma=0.001) 100 \n", "3 SVC(C=100, gamma=0.001) 100 \n", "4 SVC(C=100, gamma=0.001) 100 \n", "... ... ... \n", "5395 MLPClassifier(alpha=0.05, hidden_layer_sizes=(... NaN \n", "5396 MLPClassifier(alpha=0.05, hidden_layer_sizes=(... NaN \n", "5397 MLPClassifier(alpha=0.05, hidden_layer_sizes=(... NaN \n", "5398 MLPClassifier(alpha=0.05, hidden_layer_sizes=(... NaN \n", "5399 MLPClassifier(alpha=0.05, hidden_layer_sizes=(... NaN \n", "\n", " param_estimator__gamma param_estimator__kernel \\\n", "0 0.001 poly \n", "1 0.001 poly \n", "2 0.001 poly \n", "3 0.001 rbf \n", "4 0.001 rbf \n", "... ... ... \n", "5395 NaN NaN \n", "5396 NaN NaN \n", "5397 NaN NaN \n", "5398 NaN NaN \n", "5399 NaN NaN \n", "\n", " param_sampling \\\n", "0 None \n", "1 SMOTE(random_state=12345, sampling_strategy='m... \n", "2 ADASYN(random_state=12345, sampling_strategy='... \n", "3 None \n", "4 SMOTE(random_state=12345, sampling_strategy='m... \n", "... ... \n", "5395 SMOTE(random_state=12345, sampling_strategy='m... \n", "5396 ADASYN(random_state=12345, sampling_strategy='... \n", "5397 None \n", "5398 SMOTE(random_state=12345, sampling_strategy='m... \n", "5399 ADASYN(random_state=12345, sampling_strategy='... \n", "\n", " param_estimator__max_depth ... split4_test_roc_auc split5_test_roc_auc \\\n", "0 NaN ... 1.000000 0.883333 \n", "1 NaN ... 0.875000 0.797222 \n", "2 NaN ... 0.756098 0.833333 \n", "3 NaN ... 0.984756 1.000000 \n", "4 NaN ... 0.987805 0.994444 \n", "... ... ... ... ... \n", "5395 NaN ... 0.884146 0.991667 \n", "5396 NaN ... 0.905488 0.944444 \n", "5397 NaN ... 0.911585 0.997222 \n", "5398 NaN ... 0.942073 0.997222 \n", "5399 NaN ... 0.948171 0.997222 \n", "\n", " split6_test_roc_auc split7_test_roc_auc split8_test_roc_auc \\\n", "0 0.915625 0.800000 0.931250 \n", "1 0.762500 0.896875 0.934375 \n", "2 0.740625 0.906250 0.878125 \n", "3 0.828125 1.000000 0.996875 \n", "4 0.865625 0.971875 1.000000 \n", "... ... ... ... \n", "5395 0.959375 0.978125 0.993750 \n", "5396 0.968750 0.959375 0.965625 \n", "5397 0.993750 0.959375 0.990625 \n", "5398 0.990625 0.956250 0.990625 \n", "5399 0.984375 0.953125 0.981250 \n", "\n", " split9_test_roc_auc mean_test_roc_auc std_test_roc_auc rank_test_roc_auc \\\n", "0 0.865625 0.896291 0.050239 373 \n", "1 0.696875 0.836834 0.094692 429 \n", "2 0.675000 0.817663 0.087314 441 \n", "3 0.915625 0.966441 0.051869 67 \n", "4 0.912500 0.963774 0.040993 85 \n", "... ... ... ... ... \n", "5395 0.928125 0.952177 0.031330 335 \n", "5396 0.900000 0.932661 0.026477 434 \n", "5397 0.962500 0.977542 0.025811 19 \n", "5398 0.965625 0.978754 0.018297 9 \n", "5399 0.959375 0.974425 0.015611 50 \n", "\n", " dataset \n", "0 d_num_tg \n", "1 d_num_tg \n", "2 d_num_tg \n", "3 d_num_tg \n", "4 d_num_tg \n", "... ... \n", "5395 d_perc_to_pca \n", "5396 d_perc_to_pca \n", "5397 d_perc_to_pca \n", "5398 d_perc_to_pca \n", "5399 d_perc_to_pca \n", "\n", "[5400 rows x 87 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results_final_df" ] }, { "cell_type": "code", "execution_count": 14, "id": "6b0e85be-75e5-438c-b39f-e575d212bee8", "metadata": {}, "outputs": [], "source": [ "results_final_df.to_csv('results/20220322/all/hyperp_sampling_after.tsv', index=False, sep='\\t')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }