{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from pandas import DataFrame\n", "from scipy import stats\n", "from sklearn.metrics import jaccard_score\n", "from sklearn.metrics import pairwise_distances\n", "from statsmodels.stats.diagnostic import lilliefors\n", "from scipy.stats import mannwhitneyu, levene" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def generate(disease_feature, diseases, features):\n", " \n", " \"\"\"Function that from a list of relationships disease-feature obtains the\n", " feature matrix, so that rows are diseases and columns are features.\n", " \"\"\"\n", " \n", " \n", " dis_feat_dict = { i : list() for i in diseases}\n", " \n", " disease_feature_array = disease_feature.to_numpy()\n", " \n", " for [disease, feature] in disease_feature_array:\n", " \n", " if not feature in dis_feat_dict[disease]:\n", " dis_feat_dict[disease].append(feature)\n", " \n", " bool_matr = [[0 for x in range(len(features))] for y in range(len(diseases))]\n", " \n", " count_dis = 0 \n", " for dis in diseases: \n", " count_feat = 0 \n", " for feat in features: \n", " if feat in dis_feat_dict[dis]: \n", " bool_matr[count_dis][count_feat] = 1 \n", " count_feat += 1 \n", " count_dis += 1\n", " \n", " feature_matrix = pd.DataFrame(bool_matr)\n", " return feature_matrix\n", "#------------------------------------------------------------------------------" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DATA" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "sint = pd.read_csv('sint_all.tsv', sep='\\t')\n", "sint = sint.drop([\"Unnamed: 0\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7582" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(sint[\"disease_id\"].unique())" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1951" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(sint[\"symptom\"].unique())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "### DISNET" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "disnet_sint = sint" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "dis_feu_disnet = disnet_sint" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "dis_disnet = disnet_sint[\"disease_id\"]\n", "dis_disnet = dis_disnet.drop_duplicates()\n", "dis_disnet = dis_disnet.tolist()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "feu_disnet = disnet_sint[\"symptom\"]\n", "feu_disnet = feu_disnet.drop_duplicates()\n", "feu_disnet = feu_disnet.tolist()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "syn_disnet=generate(dis_feu_disnet,dis_disnet,feu_disnet)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "syn_disnet.columns = feu_disnet\n", "syn_disnet.index = dis_disnet" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "syn_disnet=np.array(syn_disnet)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "matrix_jaccard = pairwise_distances(syn_disnet, metric='jaccard')\n", "matrix_jaccard = pd.DataFrame(matrix_jaccard)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "matrix_jaccard.columns = dis_disnet\n", "matrix_jaccard.index = dis_disnet" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28739571" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_array = np.array(matrix_jaccard)[np.triu_indices(len(matrix_jaccard), k = 1)]\n", "len(my_array)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "my_array_simi = 1 - my_array" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.054346292990571546" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_array_simi.mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#### PATHWAYS" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "triplets_total = pd.read_excel('triples_drebiop_final_dos.xlsx',engine='openpyxl')\n", "triplets_total = triplets_total.drop(columns=['Unnamed: 0'])\n", "triplets_total = triplets_total.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "triplets_total = triplets_total.rename(columns={\"disease_id\": \"disease_PwB\",\"Original Condition CUI\":\"disease_no_PwB\"})" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "def value_jaccard(disease1, disease2):\n", " result = matrix_jaccard.loc[disease1,disease2]\n", " return result" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "C0035579 C0020598\n", "C0035579 C0035086\n", "C0035579 C0020598\n", "C0035579 C0020626\n", "C0035579 C0029456\n", "C0035579 C0029458\n", "C0035579 C0020598\n", "C0035579 C0020626\n", "C0035579 C0029456\n", "C0035579 C0029458\n", "C0035579 C0020598\n", "C0035579 C0020626\n", "C0020538 C0003962\n", "C0020538 C0013604\n", "C0020538 C0020428\n", "C0020538 C0011881\n", "C0020538 C0011881\n", "C0020538 C0011881\n", "C0020538 C0018802\n", "C0020538 C0018802\n", "C0030567 C0015371\n", "C0030567 C0020649\n", "C0030567 C0242422\n", "C0030567 C0242422\n", "C0030567 C0242422\n", "C0030567 C0600177\n", "No se puede calcular\n", "C0020538 C0038454\n", "C0020538 C0038454\n", "C0020538 C0027051\n", "C0020538 C1739363\n", "C0035579 C0085682\n", "C0035579 C0042870\n", "C0035579 C0042870\n", "C0035579 C3536984\n", "C0035579 C0039621\n", "No se puede calcular\n", "C0035579 C0085682\n", "C0002892 C0042847\n", "C0002892 C0042847\n", "C0002892 C0162316\n", "C0020538 C0009905\n", "No se puede calcular\n", "C0020538 C0029151\n", "No se puede calcular\n", "C0919267 C0001144\n", "C0919267 C0702166\n", "C0919267 C0718217\n", "No se puede calcular\n", "C1140680 C0001144\n", "C1140680 C0702166\n", "C1140680 C0718217\n", "No se puede calcular\n" ] } ], "source": [ "df_jaccard_distance = []\n", "disease1_list = triplets_total[\"disease_PwB\"].to_list()\n", "disease2_list = triplets_total[\"disease_no_PwB\"].to_list()\n", "\n", "for disease1,disease2 in zip(disease1_list,disease2_list):\n", " print(disease1,disease2)\n", " try:\n", " value = value_jaccard(disease1,disease2)\n", " df_jaccard_distance.append(value) \n", " except:\n", " print(\"No se puede calcular\")\n", " df_jaccard_distance.append(\"Na\")\n", " \n", " \n", " " ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "triplets_total[\"Jaccard_distance\"] = df_jaccard_distance" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "final_jacc_triples = triplets_total[triplets_total[\"Jaccard_distance\"]!= \"Na\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "final_jacc_triples[\"Jaccard_similarity\"] = 1-final_jacc_triples[\"Jaccard_distance\"]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.22111618034064676" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_jacc_triples[\"Jaccard_similarity\"].mean()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAi/klEQVR4nO3dbXBU5f3G8Wshy4ZYggOYkEiA0IJCGJEmKsHy4EOWIR2UDp0ygyNoZaYZEZRMyhCwJdQy9IEykRZItYGMYCyjKZYOEbMvTAiCbQNhqoLYKhKFRCZYEySyWcj5v/CflM1uQs6SzZ1svp+ZfXHuve89v3N+y+FiH1iHZVmWAAAADBlgugAAANC/EUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGBVluoCuaGlp0blz5zRkyBA5HA7T5QAAgC6wLEsXL15UYmKiBgzo+PWPPhFGzp07p6SkJNNlAACAEHz66acaNWpUh/f3iTAyZMgQSd8cTGxsrOFq+iafz6eysjK53W45nU7T5fRb9KF3oA+9A33oHcLZh8bGRiUlJbX9Pd6RPhFGWt+aiY2NJYyEyOfzKSYmRrGxsfyhN4g+9A70oXegD71DT/Theh+x4AOsAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMMpWGNm+fbvuuOOOtv+WPT09XW+88UanayoqKpSamqro6GiNGzdOBQUFN1QwAACILLbCyKhRo/SrX/1KVVVVqqqq0v3336+HH35Y77//ftD5p0+fVmZmpmbMmKHq6mqtWbNGK1asUElJSbcUDwAA+j5bP5Q3b948v+0NGzZo+/bteuedd5SSkhIwv6CgQKNHj1Z+fr4kaeLEiaqqqtKmTZu0YMGC0KsGAAARI+Rf7b169apeffVVXbp0Senp6UHnHDlyRG63229szpw5KiwslM/n6/DXAb1er7xeb9t2Y2OjpG9+WdDn84Vacr/Wet44f2bRh96BPvQO9KF3CGcfuvqYtsPIu+++q/T0dF2+fFnf+ta3tHfvXk2aNCno3Lq6OsXHx/uNxcfH68qVK6qvr1dCQkLQdRs3btT69esDxsvKyhQTE2O3ZFzD4/GYLgGiD70Ffegd6EPvEI4+NDU1dWme7TBy22236fjx4/ryyy9VUlKiJUuWqKKiosNA4nA4/LYtywo6fq3c3FxlZ2e3bTc2NiopKUlut1uxsbF2S4a+Sacej0cZGRkdviLVV0zOe9Nv+728OYYqsS+S+tCX0YfegT70DuHsQ+s7G9djO4wMGjRI3/nOdyRJaWlp+uc//6nnn39ef/zjHwPmjhw5UnV1dX5j58+fV1RUlIYPH97hPlwul1wuV8C40+nkCXuDIuEceq/6B9m+eDyR0IdIQB96B/rQO4SjD119vBv+f0Ysy/L7fMe10tPTA172KSsrU1paGk88AAAgyWYYWbNmjSorK/XJJ5/o3Xff1dq1a1VeXq5HHnlE0jdvryxevLhtflZWls6cOaPs7GydPHlSO3bsUGFhoXJycrr3KAAAQJ9l622azz//XI8++qhqa2s1dOhQ3XHHHTpw4IAyMjIkSbW1taqpqWmbn5ycrNLSUq1cuVJbt25VYmKitmzZwtd6AQBAG1thpLCwsNP7i4qKAsZmzZqlY8eO2SoKAAD0H/w2DQAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwChbYWTjxo266667NGTIEMXFxWn+/Pk6depUp2vKy8vlcDgCbh988MENFQ4AACKDrTBSUVGhZcuW6Z133pHH49GVK1fkdrt16dKl6649deqUamtr227jx48PuWgAABA5ouxMPnDggN/2zp07FRcXp6NHj2rmzJmdro2Li9PNN99su0AAABDZbIWR9hoaGiRJw4YNu+7cqVOn6vLly5o0aZKeffZZ3XfffR3O9Xq98nq9bduNjY2SJJ/PJ5/PdyMl91ut5y0Szp9roOW33ZeOKZL60JfRh96BPvQO4exDVx/TYVmWdf1pgSzL0sMPP6z//ve/qqys7HDeqVOndPDgQaWmpsrr9WrXrl0qKChQeXl5h6+m5OXlaf369QHjxcXFiomJCaVcAADQw5qamrRo0SI1NDQoNja2w3khh5Fly5Zp//79OnTokEaNGmVr7bx58+RwOLRv376g9wd7ZSQpKUn19fWdHgw65vP55PF4lJGRIafTabqcGzI5702/7ffy5hiqxL5I6kNfRh96B/rQO4SzD42NjRoxYsR1w0hIb9MsX75c+/bt08GDB20HEUmaNm2adu/e3eH9LpdLLpcrYNzpdPKEvUGRcA69Vx1+233xeCKhD5GAPvQO9KF3CEcfuvp4tsKIZVlavny59u7dq/LyciUnJ4dUXHV1tRISEkJaCwAAIoutMLJs2TIVFxfrr3/9q4YMGaK6ujpJ0tChQzV48GBJUm5urs6ePauXXnpJkpSfn6+xY8cqJSVFzc3N2r17t0pKSlRSUtLNhwIAAPoiW2Fk+/btkqTZs2f7je/cuVOPPfaYJKm2tlY1NTVt9zU3NysnJ0dnz57V4MGDlZKSov379yszM/PGKgcAABHB9ts011NUVOS3vWrVKq1atcpWUQAAoP/gt2kAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYZSuMbNy4UXfddZeGDBmiuLg4zZ8/X6dOnbruuoqKCqWmpio6Olrjxo1TQUFByAUDAIDIYiuMVFRUaNmyZXrnnXfk8Xh05coVud1uXbp0qcM1p0+fVmZmpmbMmKHq6mqtWbNGK1asUElJyQ0XDwAA+r4oO5MPHDjgt71z507FxcXp6NGjmjlzZtA1BQUFGj16tPLz8yVJEydOVFVVlTZt2qQFCxaEVjUAAIgYN/SZkYaGBknSsGHDOpxz5MgRud1uv7E5c+aoqqpKPp/vRnYPAAAigK1XRq5lWZays7P1ve99T5MnT+5wXl1dneLj4/3G4uPjdeXKFdXX1yshISFgjdfrldfrbdtubGyUJPl8PgJMiFrPWyScP9dAy2+7Lx1TJPWhL6MPvQN96B3C2YeuPmbIYeSpp57Sv/71Lx06dOi6cx0Oh9+2ZVlBx1tt3LhR69evDxgvKytTTExMCNWilcfjMV3CDfvN3f7bpaWlZgq5AZHQh0hAH3oH+tA7hKMPTU1NXZoXUhhZvny59u3bp4MHD2rUqFGdzh05cqTq6ur8xs6fP6+oqCgNHz486Jrc3FxlZ2e3bTc2NiopKUlut1uxsbGhlNyhyXlvBoy9lzfH9pyerCcUPp9PHo9HP6saIG/L/0JguI4jnNqfo750DK19yMjIkNPp7HRuTz7v+hs7fUD40Adzrr2+uAZYei6tJSx9aH1n43pshRHLsrR8+XLt3btX5eXlSk5Ovu6a9PR0/e1vf/MbKysrU1paWocH7XK55HK5AsadTme3nyjv1cBXZ9rvoytzerKeG3r8FoffPvriBaD9OeqLx9CV53JPPu/6q3BcU2Affeh5HV1fursPXX08Wx9gXbZsmXbv3q3i4mINGTJEdXV1qqur09dff902Jzc3V4sXL27bzsrK0pkzZ5Sdna2TJ09qx44dKiwsVE5Ojp1dAwCACGUrjGzfvl0NDQ2aPXu2EhIS2m579uxpm1NbW6uampq27eTkZJWWlqq8vFx33nmnnnvuOW3ZsoWv9QIAAEkhvE1zPUVFRQFjs2bN0rFjx+zsCgAA9BP8Ng0AADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAowggAADCKMAIAAIwijAAAAKMIIwAAwCjCCAAAMIowAgAAjCKMAAAAowgjAADAKMIIAAAwijACAACMIowAAACjCCMAAMAo22Hk4MGDmjdvnhITE+VwOPT66693Or+8vFwOhyPg9sEHH4RaMwAAiCBRdhdcunRJU6ZM0eOPP64FCxZ0ed2pU6cUGxvbtn3LLbfY3TUAAIhAtsPI3LlzNXfuXNs7iouL080332x7HQAAiGy2w0iopk6dqsuXL2vSpEl69tlndd9993U41+v1yuv1tm03NjZKknw+n3w+X7fW5RpoBYy130dX5vRkPaFofQzXACvoeF/S/hz1pWNorbUrNffk866/sdMHhA99MOfa60vr3wvh6ENXH9NhWVbgFa+LHA6H9u7dq/nz53c459SpUzp48KBSU1Pl9Xq1a9cuFRQUqLy8XDNnzgy6Ji8vT+vXrw8YLy4uVkxMTKjlAgCAHtTU1KRFixapoaHB76Ma7YU9jAQzb948ORwO7du3L+j9wV4ZSUpKUn19facHE4rJeW8GjL2XN8f2nJ6sJxQ+n08ej0c/qxogb4ujWx+7p7U/R33pGFr7kJGRIafT2encnnze9Td2+oDwoQ/mXHt9cQ2w9FxaS1j60NjYqBEjRlw3jPTY2zTXmjZtmnbv3t3h/S6XSy6XK2Dc6XR2+4nyXnUEjLXfR1fm9GQ9N/T4LQ6/ffTFC0D7c9QXj6Erz+WefN71V+G4psA++tDzOrq+dHcfuvp4Rv6fkerqaiUkJJjYNQAA6GVsvzLy1Vdf6T//+U/b9unTp3X8+HENGzZMo0ePVm5urs6ePauXXnpJkpSfn6+xY8cqJSVFzc3N2r17t0pKSlRSUtJ9RwEAAPos22GkqqrK75sw2dnZkqQlS5aoqKhItbW1qqmpabu/ublZOTk5Onv2rAYPHqyUlBTt379fmZmZ3VA+AADo62yHkdmzZ6uzz7wWFRX5ba9atUqrVq2yXRgAAOgf+G0aAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUbbDyMGDBzVv3jwlJibK4XDo9ddfv+6aiooKpaamKjo6WuPGjVNBQUEotQIAgAhkO4xcunRJU6ZM0R/+8IcuzT99+rQyMzM1Y8YMVVdXa82aNVqxYoVKSkpsFwsAACJPlN0Fc+fO1dy5c7s8v6CgQKNHj1Z+fr4kaeLEiaqqqtKmTZu0YMECu7sHAAARxnYYsevIkSNyu91+Y3PmzFFhYaF8Pp+cTmfAGq/XK6/X27bd2NgoSfL5fPL5fN1an2ugFTDWfh9dmdOT9YSi9TFcA6yg431J+3PUl46htdau1NyTz7v+xk4fED70wZxrry+tfy+Eow9dfUyHZVmBV7wucjgc2rt3r+bPn9/hnAkTJuixxx7TmjVr2sYOHz6se++9V+fOnVNCQkLAmry8PK1fvz5gvLi4WDExMaGWCwAAelBTU5MWLVqkhoYGxcbGdjgv7K+MSN+Elmu15p/2461yc3OVnZ3dtt3Y2KikpCS53e5ODyYUk/PeDBh7L29OWOZ0Vz2h8Pl88ng8+lnVAHlb/nfeu+OxgwnnOWu/LtiarswxobUPGRkZQV8VvFa4nguw1weED30w59rri2uApefSWsLSh9Z3Nq4n7GFk5MiRqqur8xs7f/68oqKiNHz48KBrXC6XXC5XwLjT6ez2E+W9GhiI2u+ju+Z0Vz03wtvi8NtHuC4A4Txn7dcFfauvC3NM6spzOdzPBYTnmgL76EPP6+j60t196Orjhf3/GUlPT5fH4/EbKysrU1paGk8+AABgP4x89dVXOn78uI4fPy7pm6/uHj9+XDU1NZK+eYtl8eLFbfOzsrJ05swZZWdn6+TJk9qxY4cKCwuVk5PTPUcAAAD6NNtv01RVVem+++5r2279bMeSJUtUVFSk2tratmAiScnJySotLdXKlSu1detWJSYmasuWLXytFwAASAohjMyePVudfQGnqKgoYGzWrFk6duyY3V0BAIB+gN+mAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYFRIYWTbtm1KTk5WdHS0UlNTVVlZ2eHc8vJyORyOgNsHH3wQctEAACBy2A4je/bs0TPPPKO1a9equrpaM2bM0Ny5c1VTU9PpulOnTqm2trbtNn78+JCLBgAAkcN2GNm8ebOeeOIJLV26VBMnTlR+fr6SkpK0ffv2TtfFxcVp5MiRbbeBAweGXDQAAIgctsJIc3Ozjh49Krfb7Tfudrt1+PDhTtdOnTpVCQkJeuCBB/TWW2/ZrxQAAESkKDuT6+vrdfXqVcXHx/uNx8fHq66uLuiahIQEvfDCC0pNTZXX69WuXbv0wAMPqLy8XDNnzgy6xuv1yuv1tm03NjZKknw+n3w+n52Sr8s10AoYa7+P7prTXfWEovUxXAOsoOPdLZznrP26YGu6MseE1jpCOc6ursP12ekDwoc+mHPt9aX174Vw9KGrj+mwLCvwiteBc+fO6dZbb9Xhw4eVnp7eNr5hwwbt2rWryx9KnTdvnhwOh/bt2xf0/ry8PK1fvz5gvLi4WDExMV0tFwAAGNTU1KRFixapoaFBsbGxHc6z9crIiBEjNHDgwIBXQc6fPx/waklnpk2bpt27d3d4f25urrKzs9u2GxsblZSUJLfb3enBhGJy3psBY+/lzQnLnO6qJxQ+n08ej0c/qxogb4ujWx87mHCes/brgq3pyhwTWvuQkZEhp9PZ6dxwPRdgrw8IH/pgzrXXF9cAS8+ltYSlD63vbFyPrTAyaNAgpaamyuPx6Ac/+EHbuMfj0cMPP9zlx6murlZCQkKH97tcLrlcroBxp9PZ7SfKe9URMNZ+H901p7vquRHeFoffPsJ1AQjnOWu/LtiarswxqSvP5XA/FxCeawrsow89r6PrS3f3oauPZyuMSFJ2drYeffRRpaWlKT09XS+88IJqamqUlZUl6ZtXNc6ePauXXnpJkpSfn6+xY8cqJSVFzc3N2r17t0pKSlRSUmJ31wAAIALZDiMLFy7UhQsX9Itf/EK1tbWaPHmySktLNWbMGElSbW2t3/850tzcrJycHJ09e1aDBw9WSkqK9u/fr8zMzO47CgAA0GfZDiOS9OSTT+rJJ58Mel9RUZHf9qpVq7Rq1apQdgMAAPoBfpsGAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYBRhBAAAGEUYAQAARhFGAACAUYQRAABgVEhhZNu2bUpOTlZ0dLRSU1NVWVnZ6fyKigqlpqYqOjpa48aNU0FBQUjFAgCAyGM7jOzZs0fPPPOM1q5dq+rqas2YMUNz585VTU1N0PmnT59WZmamZsyYoerqaq1Zs0YrVqxQSUnJDRcPAAD6PtthZPPmzXriiSe0dOlSTZw4Ufn5+UpKStL27duDzi8oKNDo0aOVn5+viRMnaunSpfrxj3+sTZs23XDxAACg74uyM7m5uVlHjx7V6tWr/cbdbrcOHz4cdM2RI0fkdrv9xubMmaPCwkL5fD45nc6ANV6vV16vt227oaFBkvTFF1/I5/PZKfm6oq5cChi7cOFCWOZ0Vz2h8Pl8ampqUpRvgK62OLr1sYMJ5zlrvy7Ymq7MMaG1DxcuXAj63L9WuJ4LsNcHhA99MOfa60tUi6Wmppaw9OHixYuSJMuyOp9o2XD27FlLkvX222/7jW/YsMGaMGFC0DXjx4+3NmzY4Df29ttvW5Ksc+fOBV2zbt06SxI3bty4cePGLQJun376aaf5wtYrI60cDofftmVZAWPXmx9svFVubq6ys7PbtltaWvTFF19o+PDhne4HHWtsbFRSUpI+/fRTxcbGmi6n36IPvQN96B3oQ+8Qzj5YlqWLFy8qMTGx03m2wsiIESM0cOBA1dXV+Y2fP39e8fHxQdeMHDky6PyoqCgNHz486BqXyyWXy+U3dvPNN9spFR2IjY3lD30vQB96B/rQO9CH3iFcfRg6dOh159j6AOugQYOUmpoqj8fjN+7xeDR9+vSga9LT0wPml5WVKS0tjfcIAQCA/W/TZGdn609/+pN27NihkydPauXKlaqpqVFWVpakb95iWbx4cdv8rKwsnTlzRtnZ2Tp58qR27NihwsJC5eTkdN9RAACAPsv2Z0YWLlyoCxcu6Be/+IVqa2s1efJklZaWasyYMZKk2tpav/9zJDk5WaWlpVq5cqW2bt2qxMREbdmyRQsWLOi+o8B1uVwurVu3LuDtL/Qs+tA70IfegT70Dr2hDw7Lut73bQAAAMKH36YBAABGEUYAAIBRhBEAAGAUYQQAABhFGIkQ27ZtU3JysqKjo5WamqrKysoO5/7lL39RRkaGbrnlFsXGxio9PV1vvvlmD1Yb2ez04lpvv/22oqKidOedd4a3wH7Cbh+8Xq/Wrl2rMWPGyOVy6dvf/rZ27NjRQ9VGLrt9ePnllzVlyhTFxMQoISFBjz/+OL/JdIMOHjyoefPmKTExUQ6HQ6+//vp111RUVCg1NVXR0dEaN26cCgoKwltk136VBr3Zn//8Z8vpdFovvviideLECevpp5+2brrpJuvMmTNB5z/99NPWr3/9a+sf//iH9eGHH1q5ubmW0+m0jh071sOVRx67vWj15ZdfWuPGjbPcbrc1ZcqUnik2goXSh4ceesi65557LI/HY50+fdr6+9//HvA7XLDHbh8qKyutAQMGWM8//7z18ccfW5WVlVZKSoo1f/78Hq48spSWllpr1661SkpKLEnW3r17O53/8ccfWzExMdbTTz9tnThxwnrxxRctp9Npvfbaa2GrkTASAe6++24rKyvLb+z222+3Vq9e3eXHmDRpkrV+/fruLq3fCbUXCxcutJ599llr3bp1hJFuYLcPb7zxhjV06FDrwoULPVFev2G3D7/97W+tcePG+Y1t2bLFGjVqVNhq7G+6EkZWrVpl3X777X5jP/nJT6xp06aFrS7epunjmpubdfToUbndbr9xt9utw4cPd+kxWlpadPHiRQ0bNiwcJfYbofZi586d+uijj7Ru3bpwl9gvhNKHffv2KS0tTb/5zW906623asKECcrJydHXX3/dEyVHpFD6MH36dH322WcqLS2VZVn6/PPP9dprr+n73/9+T5SM/3fkyJGAvs2ZM0dVVVXy+Xxh2WdIv9qL3qO+vl5Xr14N+KHC+Pj4gB8o7Mjvfvc7Xbp0ST/60Y/CUWK/EUov/v3vf2v16tWqrKxUVBR/HLtDKH34+OOPdejQIUVHR2vv3r2qr6/Xk08+qS+++ILPjYQolD5Mnz5dL7/8shYuXKjLly/rypUreuihh/T73/++J0rG/6urqwvatytXrqi+vl4JCQndvk9eGYkQDofDb9uyrICxYF555RXl5eVpz549iouLC1d5/UpXe3H16lUtWrRI69ev14QJE3qqvH7Dzp+JlpYWORwOvfzyy7r77ruVmZmpzZs3q6ioiFdHbpCdPpw4cUIrVqzQz3/+cx09elQHDhzQ6dOn2377DD0nWN+CjXcX/inWx40YMUIDBw4M+JfG+fPnA5Jte3v27NETTzyhV199VQ8++GA4y+wX7Pbi4sWLqqqqUnV1tZ566ilJ3/ylaFmWoqKiVFZWpvvvv79Hao8kofyZSEhI0K233ur3U+cTJ06UZVn67LPPNH78+LDWHIlC6cPGjRt177336qc//akk6Y477tBNN92kGTNm6Je//GVY/kWOQCNHjgzat6ioKA0fPjws++SVkT5u0KBBSk1Nlcfj8Rv3eDyaPn16h+teeeUVPfbYYyouLub92G5itxexsbF69913dfz48bZbVlaWbrvtNh0/flz33HNPT5UeUUL5M3Hvvffq3Llz+uqrr9rGPvzwQw0YMECjRo0Ka72RKpQ+NDU1acAA/7+WBg4cKOl//zJH+KWnpwf0raysTGlpaXI6neHZadg+Gose0/r1ucLCQuvEiRPWM888Y910003WJ598YlmWZa1evdp69NFH2+YXFxdbUVFR1tatW63a2tq225dffmnqECKG3V60x7dpuofdPly8eNEaNWqU9cMf/tB6//33rYqKCmv8+PHW0qVLTR1CRLDbh507d1pRUVHWtm3brI8++sg6dOiQlZaWZt19992mDiEiXLx40aqurraqq6stSdbmzZut6urqtq9Yt+9D61d7V65caZ04ccIqLCzkq73omq1bt1pjxoyxBg0aZH33u9+1Kioq2u5bsmSJNWvWrLbtWbNmWZICbkuWLOn5wiOQnV60RxjpPnb7cPLkSevBBx+0Bg8ebI0aNcrKzs62mpqaerjqyGO3D1u2bLEmTZpkDR482EpISLAeeeQR67PPPuvhqiPLW2+91ek1P1gfysvLralTp1qDBg2yxo4da23fvj2sNTosi9e+AACAOXxmBAAAGEUYAQAARhFGAACAUYQRAABgFGEEAAAYRRgBAABGEUYAAIBRhBEAAGAUYQQAABhFGAEAAEYRRgAAgFGEEQAAYNT/ATdjjIiua+pJAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "final_jacc_triples[\"Jaccard_similarity\"].hist(bins=100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#### TARGET" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "Triples_target_final = pd.read_excel(\"triples_final_drege_repo_csbj.xlsx\",engine='openpyxl')\n", "Triples_target_final = Triples_target_final.drop(columns=['Unnamed: 0'])\n", "Triples_target_final = Triples_target_final.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "C0041696 C0270549\n", "C1269683 C0270549\n", "C0041696 C0028754\n", "C1269683 C0028754\n", "C0011570 C0028754\n", "C0011581 C0028754\n", "C0344315 C0028754\n", "No se puede calcular\n", "C0041696 C0520676\n", "C1269683 C0520676\n", "C0011570 C0520676\n", "C0011581 C0520676\n", "C0344315 C0520676\n", "No se puede calcular\n", "C1269683 C0016053\n", "C1269683 C0016053\n", "C0011570 C0016053\n", "C0011570 C0016053\n", "C0011581 C0016053\n", "C0011581 C0016053\n", "C0344315 C0016053\n", "No se puede calcular\n", "C0344315 C0016053\n", "No se puede calcular\n", "C0029408 C0032580\n", "C0003873 C0032580\n", "C0029408 C0007102\n", "C0003873 C0007102\n", "C0029408 C0346629\n", "C0003873 C0346629\n", "C0029408 C0699790\n", "No se puede calcular\n", "C0003873 C0699790\n", "No se puede calcular\n", "C0029408 C0009402\n", "C0003873 C0009402\n", "C0029408 C0242379\n", "C0003873 C0242379\n", "C0029408 C0684249\n", "C0003873 C0684249\n", "C0029408 C1306460\n", "No se puede calcular\n", "C0003873 C1306460\n", "No se puede calcular\n", "C0029408 C0006142\n", "C0003873 C0006142\n", "C0029408 C0678222\n", "C0003873 C0678222\n", "C0003873 C0004096\n", "C0010346 C0003864\n", "C0010346 C0002395\n", "C1960398 C0278488\n", "No se puede calcular\n", "C1960398 C0024623\n", "No se puede calcular\n", "C1960398 C0699791\n", "No se puede calcular\n", "C0011849 C0006142\n", "C0011849 C0678222\n", "C0011849 C0376358\n", "C0011849 C0600139\n", "No se puede calcular\n", "C0011849 C0009402\n", "C0024530 C0003873\n", "C0024530 C0024141\n", "C0020538 C0002395\n", "C0020538 C0242350\n", "C0007222 C0242350\n", "C0006826 C0003873\n", "C0027497 C0026764\n", "C0027497 C0023343\n", "C0238198 C0017185\n", "C0238198 C0030297\n", "C0007134 C0030297\n", "C0279702 C0030297\n", "C0020538 C0030567\n", "C0020538 C0035258\n", "C0206180 C0007131\n", "C1332182 C0007131\n", "No se puede calcular\n", "C0030567 C1263846\n", "C0522224 C0002395\n", "C1704272 C0002170\n", "C0242350 C0020538\n", "C0242350 C1704272\n", "C0006142 C0029456\n", "C0678222 C0029456\n", "C0376358 C0029456\n", "C0600139 C0029456\n", "No se puede calcular\n", "C0023473 C0238198\n", "C0279543 C0238198\n", "No se puede calcular\n", "C1292771 C0238198\n", "No se puede calcular\n", "C0003873 C0376358\n", "C0003873 C0600139\n", "No se puede calcular\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C2607914 C0033774\n", "C0036341 C0033774\n", "C2607914 C0035455\n", "C2607914 C0036341\n", "C0242422 C0030567\n", "C0242422 C0030567\n", "C0242422 C0030567\n", "C0242422 C0020649\n", "C0030567 C0020649\n", "C0242422 C0015371\n", "C0030567 C0015371\n", "C0038454 C0020538\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0029408 C0003873\n", "C0015967 C0003873\n", "C0015967 C0003873\n", "C0015967 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0030193 C0003873\n", "C0038013 C0003873\n", "C0038013 C0003873\n", "C0038013 C0003873\n", "C0038013 C0003873\n", "C0038013 C0003873\n", "C0038013 C0003873\n", "C0022602 C0003873\n", "C0027051 C0003873\n", "C0009324 C0003873\n", "C0029408 C0022602\n", "C0030193 C0022602\n", "C0038013 C0022602\n", "C0029408 C0015967\n", "C0029408 C0015967\n", "C0029408 C0015967\n", "C0030193 C0015967\n", "C0030193 C0015967\n", "C0030193 C0015967\n", "C0030193 C0015967\n", "C0038013 C0015967\n", "C0029408 C0027051\n", "C0030193 C0027051\n", "C0029458 C0014175\n", "C0029458 C0014175\n", "C0476089 C0014175\n", "C0476089 C0014175\n", "C0016034 C0014175\n", "C0476089 C0029458\n", "C0476089 C0029458\n", "C0035579 C0020598\n", "C0035579 C0020598\n", "C0035579 C0020598\n", "C0035579 C0020598\n", "C0085682 C0020598\n", "C0085682 C0020598\n", "C0085682 C0020598\n", "C0029456 C0020598\n", "C0029456 C0020598\n", "C0029458 C0020598\n", "C0029458 C0020598\n", "C0042870 C0020598\n", "C0042870 C0020598\n", "C0035579 C0029456\n", "C0035579 C0029456\n", "C0029458 C0029456\n", "C0029458 C0029456\n", "C0042870 C0029456\n", "C0042870 C0029456\n", "C0035579 C0029458\n", "C0035579 C0029458\n", "C0042870 C0029458\n", "C0042870 C0029458\n", "C0085682 C0035579\n", "C0085682 C0035579\n", "C0042870 C0035579\n", "C0042870 C0035579\n", "C0020443 C0003850\n", "C0020443 C0003850\n", "C0020443 C0003850\n", "C0020443 C0003850\n", "C0020443 C0003850\n", "C0020443 C0003850\n", "C0494475 C0014544\n", "C0494475 C0014544\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0003873 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004096 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0004364 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0010346 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0021390 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0024141 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0026769 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0027726 C0001403\n", "C0042164 C0001403\n", "C0042164 C0001403\n", "C0042164 C0001403\n", "C0042164 C0001403\n", "C0042164 C0001403\n", "C0009324 C0001403\n", "C0009324 C0001403\n", "C0009324 C0001403\n", "C0026764 C0001403\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004096 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0004364 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0010346 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0021390 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0026769 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0027726 C0003873\n", "C0042164 C0003873\n", "C0042164 C0003873\n", "C0042164 C0003873\n", "C0042164 C0003873\n", "C0042164 C0003873\n", "C0009324 C0003873\n", "C0009324 C0003873\n", "C0009324 C0003873\n", "C0026764 C0003873\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0004364 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0010346 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0021390 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0024141 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0026769 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0027726 C0004096\n", "C0042164 C0004096\n", "C0042164 C0004096\n", "C0042164 C0004096\n", "C0042164 C0004096\n", "C0042164 C0004096\n", "C0027430 C0004096\n", "C0009324 C0004096\n", "C0009324 C0004096\n", "C0009324 C0004096\n", "C0026764 C0004096\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0010346 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0021390 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0024141 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0026769 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0027726 C0004364\n", "C0042164 C0004364\n", "C0042164 C0004364\n", "C0042164 C0004364\n", "C0042164 C0004364\n", "C0009324 C0004364\n", "C0009324 C0004364\n", "C0009324 C0004364\n", "C0010346 C0009324\n", "C0010346 C0009324\n", "C0010346 C0009324\n", "C0021390 C0009324\n", "C0021390 C0009324\n", "C0021390 C0009324\n", "C0024141 C0009324\n", "C0024141 C0009324\n", "C0024141 C0009324\n", "C0026769 C0009324\n", "C0026769 C0009324\n", "C0026769 C0009324\n", "C0027726 C0009324\n", "C0027726 C0009324\n", "C0027726 C0009324\n", "C0042164 C0009324\n", "C0042164 C0009324\n", "C0042164 C0009324\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0021390 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0024141 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0026769 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0027726 C0010346\n", "C0042164 C0010346\n", "C0042164 C0010346\n", "C0042164 C0010346\n", "C0042164 C0010346\n", "C0042164 C0010346\n", "C0026764 C0010346\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0024141 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0026769 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0027726 C0021390\n", "C0042164 C0021390\n", "C0042164 C0021390\n", "C0042164 C0021390\n", "C0042164 C0021390\n", "C0042164 C0021390\n", "C0026764 C0021390\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0026769 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0027726 C0024141\n", "C0042164 C0024141\n", "C0042164 C0024141\n", "C0042164 C0024141\n", "C0042164 C0024141\n", "C0042164 C0024141\n", "C0026764 C0024141\n", "C0026769 C0026764\n", "C0027726 C0026764\n", "C0042164 C0026764\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0027726 C0026769\n", "C0042164 C0026769\n", "C0042164 C0026769\n", "C0042164 C0026769\n", "C0042164 C0026769\n", "C0042164 C0026769\n", "C0024115 C0004096\n", "C0036341 C0005587\n", "C0036341 C0005587\n", "C0011581 C0005587\n", "C0036341 C0033975\n", "C0036341 C0033975\n", "C0036341 C0005586\n", "C0036341 C0005586\n", "C0005587 C0005586\n", "C0036341 C0033975\n", "C0036341 C0033975\n", "C0036341 C0033975\n", "C0040517 C0033975\n", "C0036341 C0004352\n", "C0036341 C0004352\n", "C0036341 C0005587\n", "C1269683 C0005587\n", "C0036337 C0033975\n", "C0036341 C0036337\n", "C0036341 C0012734\n", "C0033975 C0012734\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0030193 C0029408\n", "C0038013 C0029408\n", "C0038013 C0029408\n", "C0038013 C0029408\n", "C0038013 C0029408\n", "C0038013 C0029408\n", "C0038013 C0029408\n", "C0149931 C0030193\n", "C0042164 C0027726\n", "C0042164 C0027726\n", "C0042164 C0027726\n", "C0042164 C0027726\n", "C0042164 C0027726\n", "C0038454 C0020538\n", "C0027051 C0020538\n", "C0022735 C0010417\n", "C0022735 C0010417\n", "C0022735 C0010417\n", "C0022735 C0010417\n", "C0022735 C0010417\n", "C0271623 C0010417\n", "C1269683 C0011581\n", "C1269683 C0011581\n", "C0030319 C0011581\n", "C0030319 C0011581\n", "C1269683 C0030319\n", "C1269683 C0030319\n", "C1269683 C0005587\n", "C0011581 C0005587\n", "C0917801 C0005587\n", "C0028768 C0005587\n", "C0030319 C0005587\n", "C2267227 C0005587\n", "C1269683 C0011581\n", "C1269683 C0011581\n", "C0917801 C0011581\n", "C0028768 C0011581\n", "C0028768 C0011581\n", "C0028768 C0011581\n", "C0030319 C0011581\n", "C2267227 C0011581\n", "C0038436 C0011581\n", "C0038436 C0011581\n", "C1269683 C0028768\n", "C1269683 C0028768\n", "C0030319 C0028768\n", "C0030319 C0028768\n", "C0030319 C0028768\n", "C2267227 C0028768\n", "C0038436 C0028768\n", "C0038436 C0028768\n", "C1269683 C0030319\n", "C2267227 C0030319\n", "C0038436 C0030319\n", "C0038436 C0030319\n", "C1269683 C0038436\n", "C0028754 C0011581\n", "C0028043 C0011581\n", "C1269683 C0028043\n", "C0028754 C0028043\n", "C1269683 C0028754\n", "C1263846 C0028754\n", "C1263846 C0028754\n", "C0038013 C0030193\n", "C0038013 C0030193\n", "C0033774 C0011615\n", "C0020621 C0009806\n", "C0334634 C0007134\n", "C0334634 C0007134\n", "C0041341 C0007134\n", "C0678222 C0007134\n", "C1263846 C0020538\n", "C1263846 C0020538\n", "C0242422 C0030567\n", "C0242422 C0030567\n", "C0030920 C0017168\n", "C0030920 C0017168\n", "C0030920 C0017168\n", "C0030920 C0017168\n", "C0030920 C0017168\n", "C0149871 C0034065\n", "C0149871 C0012739\n", "C0011860 C0011854\n", "C0524910 C0524909\n", "No se puede calcular\n", "C0206682 C0007134\n", "C2239176 C0007134\n", "C0238463 C0007134\n", "No se puede calcular\n", "C0238198 C0007134\n", "C1261473 C0007134\n", "C0238463 C0206682\n", "No se puede calcular\n", "C0020538 C0004238\n", "C0027051 C0020538\n", "C0027051 C0020538\n", "C0035258 C0030567\n", "C0035258 C0030567\n", "C0035258 C0030567\n", "C2239176 C0238198\n", "C0006277 C0004096\n", "C0006277 C0004096\n", "C0034067 C0004096\n", "C0034067 C0004096\n", "C0024115 C0004096\n", "C0024115 C0004096\n", "C0034067 C0006277\n", "C0034067 C0006277\n", "C0020538 C0020428\n", "C0023474 C0005699\n", "No se puede calcular\n", "C0023474 C0005699\n", "No se puede calcular\n", "C0206141 C0005699\n", "No se puede calcular\n", "C0238198 C0005699\n", "No se puede calcular\n", "C0032285 C0031099\n", "C0042029 C0031099\n", "C0042029 C0032285\n", "C0020538 C0004238\n", "C0020538 C0004238\n", "C0023480 C0023467\n", "C0020538 C0011881\n", "C0020538 C0011881\n", "C0024138 C0003873\n", "C0024138 C0003873\n", "C0024141 C0003873\n", "C0024141 C0003873\n", "C0024535 C0003873\n", "C0024535 C0003873\n", "C0024141 C0024138\n", "C0024141 C0024138\n", "C0024535 C0024138\n", "C0024535 C0024138\n", "C0024535 C0024141\n", "C0024535 C0024141\n", "C0020676 C0010308\n", "C0020676 C0010308\n", "C0020676 C0018021\n", "C0020676 C0018021\n", "C0008350 C0008312\n", "C0020538 C0011881\n", "C0020538 C0018802\n", "C0020538 C0018802\n", "C0028754 C0020649\n", "C2607914 C0035455\n", "C0014547 C0014544\n", "C0038220 C0014544\n", "C0494475 C0014544\n", "C0038220 C0014547\n", "C0494475 C0014547\n", "C0494475 C0038220\n", "C0023487 C0023467\n", "C0032463 C0027022\n", "C0027773 C0020459\n", "C0235480 C0004238\n", "C0235480 C0004238\n", "C0235480 C0004238\n", "C0024623 C0007102\n", "C0334634 C0023434\n", "C0028754 C0001973\n", "C0150055 C0001973\n", "C0150055 C0028754\n", "C0150055 C0030193\n", "C0150055 C0030193\n", "C0524662 C0030193\n", "C2215257 C0030193\n", "No se puede calcular\n", "C2215257 C0030193\n", "No se puede calcular\n", "C2215257 C0030193\n", "No se puede calcular\n", "C0678222 C0041341\n", "C1269683 C0036341\n", "C1269683 C0036341\n", "C0040517 C0036341\n", "C0029456 C0020598\n", "C0024301 C0023434\n", "C0524662 C0150055\n", "C2215257 C0150055\n", "No se puede calcular\n", "C2215257 C0150055\n", "No se puede calcular\n", "C2215257 C0524662\n", "No se puede calcular\n", "C0004604 C0003873\n", "C0149931 C0003873\n", "C0027051 C0004604\n", "C0029408 C0004604\n", "C0030193 C0004604\n", "C0149931 C0027051\n", "C0149931 C0030193\n", "C0038220 C0014544\n", "C0038220 C0014544\n", "C0038220 C0014544\n", "C0024305 C0003873\n", "C0242379 C0003873\n", "C0278996 C0003873\n", "C0242379 C0024305\n", "C0278996 C0024305\n", "C0278996 C0242379\n", "C0003873 C0003872\n", "C0004096 C0003872\n", "C0006114 C0003872\n", "C0010346 C0003872\n", "C0021390 C0003872\n", "C0024141 C0003872\n", "C0024301 C0003872\n", "C0024305 C0003872\n", "C0026769 C0003872\n", "C0033860 C0003872\n", "C0038013 C0003872\n", "C0042165 C0003872\n", "No se puede calcular\n", "C0079744 C0003872\n", "C0011615 C0003873\n", "C0006114 C0003873\n", "C0024301 C0003873\n", "C0024305 C0003873\n", "C0033860 C0003873\n", "C0038013 C0003873\n", "C0042165 C0003873\n", "No se puede calcular\n", "C0079744 C0003873\n", "C0006114 C0004096\n", "C0024301 C0004096\n", "C0024305 C0004096\n", "C0033860 C0004096\n", "C0038013 C0004096\n", "C0042165 C0004096\n", "No se puede calcular\n", "C0079744 C0004096\n", "C0011615 C0004096\n", "C0010346 C0006114\n", "C0021390 C0006114\n", "C0024141 C0006114\n", "C0024301 C0006114\n", "C0024305 C0006114\n", "C0026769 C0006114\n", "C0033860 C0006114\n", "C0038013 C0006114\n", "C0042165 C0006114\n", "No se puede calcular\n", "C0079744 C0006114\n", "C0011615 C0010346\n", "C0021390 C0011615\n", "C0024301 C0010346\n", "C0024305 C0010346\n", "C0033860 C0010346\n", "C0038013 C0010346\n", "C0042165 C0010346\n", "No se puede calcular\n", "C0079744 C0010346\n", "C0024301 C0021390\n", "C0024305 C0021390\n", "C0033860 C0021390\n", "C0038013 C0021390\n", "C0042165 C0021390\n", "No se puede calcular\n", "C0079744 C0021390\n", "C0024301 C0024141\n", "C0024305 C0024141\n", "C0033860 C0024141\n", "C0038013 C0024141\n", "C0042165 C0024141\n", "No se puede calcular\n", "C0079744 C0024141\n", "C0024305 C0024301\n", "C0026769 C0024301\n", "C0033860 C0024301\n", "C0038013 C0024301\n", "C0042165 C0024301\n", "No se puede calcular\n", "C0079744 C0024301\n", "C0026769 C0024305\n", "C0033860 C0024305\n", "C0038013 C0024305\n", "C0042165 C0024305\n", "No se puede calcular\n", "C0079744 C0024305\n", "C0033860 C0026769\n", "C0038013 C0026769\n", "C0042165 C0026769\n", "No se puede calcular\n", "C0079744 C0026769\n", "C0038013 C0033860\n", "C0042165 C0033860\n", "No se puede calcular\n", "C0079744 C0033860\n", "C0042165 C0038013\n", "No se puede calcular\n", "C0079744 C0038013\n", "C0079744 C0042165\n", "No se puede calcular\n", "C0271623 C0022735\n", "C0494475 C0014544\n", "C0494475 C0014544\n", "C0014547 C0014544\n", "C0494475 C0014547\n", "C2267227 C1269683\n", "C0020538 C0007131\n", "C0036220 C0007131\n", "C0678222 C0007131\n", "C1140680 C0007131\n", "C0036220 C0020538\n", "C0678222 C0020538\n", "C1140680 C0020538\n", "C0678222 C0024623\n", "C0376358 C0024623\n", "C0678222 C0376358\n", "C0678222 C0036220\n", "C1140680 C0036220\n", "C1140680 C0678222\n", "C0030567 C0020514\n", "C0020550 C0018213\n", "C0027819 C0027708\n", "C0149925 C0027708\n", "C0242379 C0027708\n", "C0678222 C0027708\n", "C1140680 C0027708\n", "C0149925 C0027819\n", "C0242379 C0027819\n", "C0678222 C0027819\n", "C1140680 C0027819\n", "C0242379 C0149925\n", "C0678222 C0149925\n", "C1140680 C0149925\n", "C0678222 C0242379\n", "C1140680 C0242379\n", "C1140680 C0678222\n", "C0238198 C0206141\n", "C0221013 C0206141\n", "C0238198 C0221013\n", "C0020557 C0020474\n", "C0020557 C0020474\n", "C0020557 C0020473\n", "C0020474 C0020473\n", "C0020473 C0020443\n", "C1112486 C0023467\n", "C0020649 C0006267\n", "C0034067 C0006267\n", "C0043144 C0006267\n", "No se puede calcular\n", "C0034067 C0020649\n", "C0043144 C0020649\n", "No se puede calcular\n", "C0043144 C0034067\n", "No se puede calcular\n", "C0024537 C0003873\n", "C0024537 C0024138\n", "C0024537 C0024141\n", "C0024537 C0024535\n", "C0242350 C0020542\n", "C0032768 C0027796\n", "C0033774 C0027796\n", "C0033774 C0032768\n", "C1140680 C0007131\n", "C0206141 C0023474\n", "No se puede calcular\n", "C0238198 C0023474\n", "No se puede calcular\n", "C0020538 C0013604\n", "C0020542 C0011644\n", "C1269683 C0030567\n" ] } ], "source": [ "df_jaccard_distance_target = []\n", "disease1_list = Triples_target_final[\"disease_id\"].to_list()\n", "disease2_list = Triples_target_final[\"New Condition CUI\"].to_list()\n", "\n", "for disease1,disease2 in zip(disease1_list,disease2_list):\n", " print(disease1,disease2)\n", " try:\n", " value = value_jaccard(disease1,disease2)\n", " df_jaccard_distance_target.append(value) \n", " except:\n", " print(\"No se puede calcular\")\n", " df_jaccard_distance_target.append(\"Na\")\n", " " ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "Triples_target_final[\"Jaccard_distance\"] = df_jaccard_distance_target" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "final_jacc_triples_tar = Triples_target_final[Triples_target_final[\"Jaccard_distance\"]!= \"Na\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "final_jacc_triples_tar[\"Jaccard_similarity\"] = 1-final_jacc_triples_tar[\"Jaccard_distance\"]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.1706008356068068" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_jacc_triples_tar[\"Jaccard_similarity\"].mean()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAit0lEQVR4nO3de3DU1f3/8deSLEvCJChQcpEIoQ31gqIFoSZtiZWEsXgr46UTq2i94EStMVWaFCuLU4PEaZoKguJYYKqpTFWsM6JkO60RjK2A0Co40GqkXEwzYCTB0M0C5/eH3+zPzUbILrufPRuej5mMfs6ez9n3vlnZl+ezm3UZY4wAAAAsMijRBQAAAPRGQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWCc10QVE49ixY9q3b58yMjLkcrkSXQ4AAOgHY4w6OzuVm5urQYOOv0eSlAFl3759ysvLS3QZAAAgCrt379bo0aOPOycpA0pGRoakLx5gZmZmVGsEAgE1NjaqtLRUbrc7luWhF3rtLPrtHHrtHHrtrHj1u6OjQ3l5ecHX8eNJyoDSc1knMzPzpAJKenq6MjMzebLHGb12Fv12Dr12Dr12Vrz73Z+3Z/AmWQAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrpCa6ADhnbNWrYWMfPzozAZUAAHB87KAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYJ2IA8qbb76pK664Qrm5uXK5XHr55ZdDbjfGyOv1Kjc3V2lpaSouLta2bdtC5vj9ft1zzz0aOXKkhg4dqiuvvFJ79uw5qQcCAAAGjogDyueff66JEydqyZIlfd5eW1ururo6LVmyRBs3blR2drZKSkrU2dkZnFNRUaE1a9bo+eef14YNG3To0CFdfvnlOnr0aPSPBAAADBipkZ5w2WWX6bLLLuvzNmOM6uvrNW/ePM2aNUuStGrVKmVlZamhoUFz5szRwYMH9cwzz+j3v/+9pk+fLkl69tlnlZeXpz//+c+aMWPGSTwcAAAwEEQcUI6npaVFra2tKi0tDY55PB5NmzZNzc3NmjNnjjZv3qxAIBAyJzc3VxMmTFBzc3OfAcXv98vv9wePOzo6JEmBQECBQCCqWnvOi/b8ZORJMWFjTjz+U7HXiUS/nUOvnUOvnRWvfkeyXkwDSmtrqyQpKysrZDwrK0u7du0Kzhk8eLBOP/30sDk95/e2cOFCLViwIGy8sbFR6enpJ1Wzz+c7qfOTSe2U8LG1a9c6dv+nUq9tQL+dQ6+dQ6+dFet+d3V19XtuTANKD5fLFXJsjAkb6+14c6qrq1VZWRk87ujoUF5enkpLS5WZmRlVjYFAQD6fTyUlJXK73VGtkWwmeNeFjb3vjf8ltVOx14lEv51Dr51Dr50Vr373XAHpj5gGlOzsbElf7JLk5OQEx9va2oK7KtnZ2eru7lZ7e3vILkpbW5sKCwv7XNfj8cjj8YSNu93uk25cLNZIFv6j4QHQycd+KvXaBvTbOfTaOfTaWbHudyRrxfT3oOTn5ys7OztkS6i7u1tNTU3B8DFp0iS53e6QOZ988onef//9rwwoAADg1BLxDsqhQ4f073//O3jc0tKirVu3avjw4TrzzDNVUVGhmpoaFRQUqKCgQDU1NUpPT1dZWZkkadiwYbr11lv1s5/9TCNGjNDw4cN1//3367zzzgt+qgcAAJzaIg4omzZt0iWXXBI87nlvyOzZs7Vy5UrNnTtXhw8fVnl5udrb2zV16lQ1NjYqIyMjeM5vfvMbpaam6rrrrtPhw4d16aWXauXKlUpJSYnBQwIAAMku4oBSXFwsY8I/rtrD5XLJ6/XK6/V+5ZwhQ4Zo8eLFWrx4caR3DwAATgF8Fw8AALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKwT84By5MgRPfjgg8rPz1daWprGjRunhx9+WMeOHQvOMcbI6/UqNzdXaWlpKi4u1rZt22JdCgAASFIxDyiLFi3Sk08+qSVLluiDDz5QbW2tHnvsMS1evDg4p7a2VnV1dVqyZIk2btyo7OxslZSUqLOzM9blAACAJBTzgPL222/rqquu0syZMzV27Fhdc801Ki0t1aZNmyR9sXtSX1+vefPmadasWZowYYJWrVqlrq4uNTQ0xLocAACQhFJjveB3vvMdPfnkk9q5c6fGjx+vf/zjH9qwYYPq6+slSS0tLWptbVVpaWnwHI/Ho2nTpqm5uVlz5swJW9Pv98vv9wePOzo6JEmBQECBQCCqOnvOi/b8ZORJMWFjTjz+U7HXiUS/nUOvnUOvnRWvfkeynssYE/6qdRKMMfrFL36hRYsWKSUlRUePHtUjjzyi6upqSVJzc7OKioq0d+9e5ebmBs+74447tGvXLq1bty5sTa/XqwULFoSNNzQ0KD09PZblAwCAOOnq6lJZWZkOHjyozMzM486N+Q7K6tWr9eyzz6qhoUHnnnuutm7dqoqKCuXm5mr27NnBeS6XK+Q8Y0zYWI/q6mpVVlYGjzs6OpSXl6fS0tITPsCvEggE5PP5VFJSIrfbHdUayWaCNzz8ve+dEff7PRV7nUj02zn02jn02lnx6nfPFZD+iHlAeeCBB1RVVaUf/ehHkqTzzjtPu3bt0sKFCzV79mxlZ2dLklpbW5WTkxM8r62tTVlZWX2u6fF45PF4wsbdbvdJNy4WayQL/9HwAOjkYz+Vem0D+u0ceu0ceu2sWPc7krVi/ibZrq4uDRoUumxKSkrwY8b5+fnKzs6Wz+cL3t7d3a2mpiYVFhbGuhwAAJCEYr6DcsUVV+iRRx7RmWeeqXPPPVdbtmxRXV2dfvKTn0j64tJORUWFampqVFBQoIKCAtXU1Cg9PV1lZWWxLgcAACShmAeUxYsX65e//KXKy8vV1tam3NxczZkzRw899FBwzty5c3X48GGVl5ervb1dU6dOVWNjozIyMmJdDgAASEIxDygZGRmqr68Pfqy4Ly6XS16vV16vN9Z3DwAABgC+iwcAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWCfmv0kWp6axVa+GHH/86MwEVQIAGAjYQQEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0+xTNA9f5UDQAAyYQdFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDqpiS4AJza26tWQ448fnZmgSgAAcAY7KAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgnbgElL179+rHP/6xRowYofT0dF1wwQXavHlz8HZjjLxer3Jzc5WWlqbi4mJt27YtHqUAAIAkFPPv4mlvb1dRUZEuueQSvfbaaxo1apQ+/PBDnXbaacE5tbW1qqur08qVKzV+/Hj96le/UklJiXbs2KGMjIxYl4STxHcBAQCcFvOAsmjRIuXl5WnFihXBsbFjxwb/3Rij+vp6zZs3T7NmzZIkrVq1SllZWWpoaNCcOXNiXRIAAEgyMQ8or7zyimbMmKFrr71WTU1NOuOMM1ReXq7bb79dktTS0qLW1laVlpYGz/F4PJo2bZqam5v7DCh+v19+vz943NHRIUkKBAIKBAJR1dlzXrTnO8mTYkKO+1Nz73O+SjRr9XXO8eYkU68HAvrtHHrtHHrtrHj1O5L1XMaY/r2S9dOQIUMkSZWVlbr22mv1zjvvqKKiQk899ZRuuukmNTc3q6ioSHv37lVubm7wvDvuuEO7du3SunXrwtb0er1asGBB2HhDQ4PS09NjWT4AAIiTrq4ulZWV6eDBg8rMzDzu3JjvoBw7dkyTJ09WTU2NJOnCCy/Utm3btGzZMt10003BeS6XK+Q8Y0zYWI/q6mpVVlYGjzs6OpSXl6fS0tITPsCvEggE5PP5VFJSIrfbHdUaTpngDQ1t73tnRHzOV4lmrb7OOd6cZOr1QEC/nUOvnUOvnRWvfvdcAemPmAeUnJwcnXPOOSFjZ599tl588UVJUnZ2tiSptbVVOTk5wTltbW3Kysrqc02PxyOPxxM27na7T7pxsVgj3vxHQ4Nbf+rtfc5XiWatvs7pz5xk6PVAQr+dQ6+dQ6+dFet+R7JWzD9mXFRUpB07doSM7dy5U2PGjJEk5efnKzs7Wz6fL3h7d3e3mpqaVFhYGOtyAABAEor5Dsp9992nwsJC1dTU6LrrrtM777yj5cuXa/ny5ZK+uLRTUVGhmpoaFRQUqKCgQDU1NUpPT1dZWVmsywEAAEko5gHloosu0po1a1RdXa2HH35Y+fn5qq+v1w033BCcM3fuXB0+fFjl5eVqb2/X1KlT1djYyO9AAQAAkuIQUCTp8ssv1+WXX/6Vt7tcLnm9Xnm93njcPQAASHJ8Fw8AALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYJzXRBSByY6teDRv7+NGZCagEAID4YAcFAABYh4ACAACswyUehOjr8hEAAE5jBwUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsk5roApBYY6teTXQJAACEYQcFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKwT94CycOFCuVwuVVRUBMeMMfJ6vcrNzVVaWpqKi4u1bdu2eJcCAACSRFwDysaNG7V8+XKdf/75IeO1tbWqq6vTkiVLtHHjRmVnZ6ukpESdnZ3xLAcAACSJuAWUQ4cO6YYbbtDTTz+t008/PThujFF9fb3mzZunWbNmacKECVq1apW6urrU0NAQr3IAAEASiVtAueuuuzRz5kxNnz49ZLylpUWtra0qLS0Njnk8Hk2bNk3Nzc3xKgcAACSR1Hgs+vzzz2vz5s3atGlT2G2tra2SpKysrJDxrKws7dq1q8/1/H6//H5/8Lijo0OSFAgEFAgEoqqx57xoz3eSJ8WccE7vx9Gfc6LVV89639+X5yRTrwcC+u0ceu0ceu2sePU7kvVcxpiYvpLt3r1bkydPVmNjoyZOnChJKi4u1gUXXKD6+no1NzerqKhI+/btU05OTvC822+/Xbt379brr78etqbX69WCBQvCxhsaGpSenh7L8gEAQJx0dXWprKxMBw8eVGZm5nHnxjygvPzyy/rhD3+olJSU4NjRo0flcrk0aNAg7dixQ9/4xjf07rvv6sILLwzOueqqq3Taaadp1apVYWv2tYOSl5en/fv3n/ABfpVAICCfz6eSkhK53e6o1nDKBO+6E8553zsj4nOi1fu++rq/L89Jpl4PBPTbOfTaOfTaWfHqd0dHh0aOHNmvgBLzSzyXXnqp3nvvvZCxW265RWeddZZ+/vOfa9y4ccrOzpbP5wsGlO7ubjU1NWnRokV9runxeOTxeMLG3W73STcuFmvEm/+o64Rzej+G/pwTrb761fv++pqTDL0eSOi3c+i1c+i1s2Ld70jWinlAycjI0IQJE0LGhg4dqhEjRgTHKyoqVFNTo4KCAhUUFKimpkbp6ekqKyuLdTkAACAJxeVNsicyd+5cHT58WOXl5Wpvb9fUqVPV2NiojIyMRJQDAAAs40hAeeONN0KOXS6XvF6vvF6vE3cPAACSTEJ2UE5VY6teDRv7+NGZCagEAAC78WWBAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1+EVtA0RfvwQOAIBkxQ4KAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsk5roAnDqmOBdJ/9RV/D440dnJrAaAIDN2EEBAADWIaAAAADrcIkHVhlb9WrIMZeBAODUxA4KAACwDgEFAABYh4ACAACsw3tQLNP7PRgAAJyK2EEBAADWIaAAAADrcIknjrhcAwBAdNhBAQAA1iGgAAAA63CJpx/6ulTDbzgFACB+2EEBAADWiXlAWbhwoS666CJlZGRo1KhRuvrqq7Vjx46QOcYYeb1e5ebmKi0tTcXFxdq2bVusSwEAAEkq5gGlqalJd911l/72t7/J5/PpyJEjKi0t1eeffx6cU1tbq7q6Oi1ZskQbN25Udna2SkpK1NnZGetyAABAEor5e1Bef/31kOMVK1Zo1KhR2rx5s773ve/JGKP6+nrNmzdPs2bNkiStWrVKWVlZamho0Jw5c2JdEgAASDJxf5PswYMHJUnDhw+XJLW0tKi1tVWlpaXBOR6PR9OmTVNzc3OfAcXv98vv9wePOzo6JEmBQECBQCCqunrO68/5nhTzledHet6J1unPOYnW12PvXfeX5/T8u2fQV8/pzzron0ie2zg59No59NpZ8ep3JOu5jDFxe0U0xuiqq65Se3u71q9fL0lqbm5WUVGR9u7dq9zc3ODcO+64Q7t27dK6devC1vF6vVqwYEHYeENDg9LT0+NVPgAAiKGuri6VlZXp4MGDyszMPO7cuO6g3H333frnP/+pDRs2hN3mcrlCjo0xYWM9qqurVVlZGTzu6OhQXl6eSktLT/gAv0ogEJDP51NJSYncbvdx507whoem970zTngffZ13onX6c06i9fXYe9f95Tk9vf7lpkHyH3P1Oac/66B/Inlu4+TQa+fQa2fFq989V0D6I24B5Z577tErr7yiN998U6NHjw6OZ2dnS5JaW1uVk5MTHG9ra1NWVlafa3k8Hnk8nrBxt9t90o3rzxr+o+HBqT/329d5J1qnP+ckWl+PvXfdfc455gqZF+066J9Y/PeB/qHXzqHXzop1vyNZK+af4jHG6O6779ZLL72kv/zlL8rPzw+5PT8/X9nZ2fL5fMGx7u5uNTU1qbCwMNblAACAJBTzHZS77rpLDQ0N+tOf/qSMjAy1trZKkoYNG6a0tDS5XC5VVFSopqZGBQUFKigoUE1NjdLT01VWVhbrcgAAQBKKeUBZtmyZJKm4uDhkfMWKFbr55pslSXPnztXhw4dVXl6u9vZ2TZ06VY2NjcrIyIh1OQAAIAnFPKD050NBLpdLXq9XXq831ncPAAAGAL6LBwAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdeL+ZYED1diqV0OOP350ZoIqsdOX++NJMaqd4sx99eDPAwCSGzsoAADAOgQUAABgHQIKAACwDu9BiZG+3gcBAACiww4KAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdflEbIsYvpQMAxBs7KAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdfhFbQnGLz07vr768/GjMxNQCQAMXL3/rvWkGNVOSVAx/4cdFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1uFTPMBx9H5nO58gQiLwPMSpiB0UAABgHQIKAACwDgEFAABYh/egIOnw23eB2OH9LbAVOygAAMA6BBQAAGAdLvH0gUsIzohnn/uzbW371vZA+aJE2/sMwE7soAAAAOsQUAAAgHW4xAMgZrg8OjBxmQ6JwA4KAACwDgEFAABYh0s8wP/pz+WJZPhkzQTvOtVO+eKf/qMu6+oDThaXnE4N7KAAAADrEFAAAIB1uMSDU4KTny6J9jIQn4BJLmOrXpUnxQQvp+145PKE19Mblz6QzNhBAQAA1kloQFm6dKny8/M1ZMgQTZo0SevXr09kOQAAwBIJu8SzevVqVVRUaOnSpSoqKtJTTz2lyy67TNu3b9eZZ56ZqLKAiEXz6Z9ot97j9ekFLg+E4nIbkHgJ20Gpq6vTrbfeqttuu01nn3226uvrlZeXp2XLliWqJAAAYImE7KB0d3dr8+bNqqqqChkvLS1Vc3Nz2Hy/3y+/3x88PnjwoCTp008/VSAQiKqGQCCgrq4uHThwQG63O+S21COfR7Um+pZ6zKir65hSA4N09Jgr0eVY4cCBA2Fj0Tzv+lwn8HlIv/ua05/77s95/VknFus6rb9/Fl9+bsfzcfWup7/Pn2j+7PuzdiL+DL/8d7YN9Qw0vXva89zu6zXyZHR2dkqSjDEnnmwSYO/evUaSeeutt0LGH3nkETN+/Piw+fPnzzeS+OGHH3744YefAfCze/fuE2aFhH7M2OUK/b9pY0zYmCRVV1ersrIyeHzs2DF9+umnGjFiRJ/z+6Ojo0N5eXnavXu3MjMzo1oD/UOvnUW/nUOvnUOvnRWvfhtj1NnZqdzc3BPOTUhAGTlypFJSUtTa2hoy3tbWpqysrLD5Ho9HHo8nZOy0006LSS2ZmZk82R1Cr51Fv51Dr51Dr50Vj34PGzasX/MS8ibZwYMHa9KkSfL5fCHjPp9PhYWFiSgJAABYJGGXeCorK3XjjTdq8uTJuvjii7V8+XL95z//0Z133pmokgAAgCUSFlCuv/56HThwQA8//LA++eQTTZgwQWvXrtWYMWMcuX+Px6P58+eHXTpC7NFrZ9Fv59Br59BrZ9nQb5cx/fmsDwAAgHP4Lh4AAGAdAgoAALAOAQUAAFiHgAIAAKwzoAPK0qVLlZ+fryFDhmjSpElav379cec3NTVp0qRJGjJkiMaNG6cnn3zSoUqTXyS9fumll1RSUqKvfe1ryszM1MUXX6x169Y5WG3yi/S53eOtt95SamqqLrjggvgWOIBE2mu/36958+ZpzJgx8ng8+vrXv67f/e53DlWb3CLt9XPPPaeJEycqPT1dOTk5uuWWW/henn548803dcUVVyg3N1cul0svv/zyCc9JyOtjTL5cx0LPP/+8cbvd5umnnzbbt2839957rxk6dKjZtWtXn/M/+ugjk56ebu69916zfft28/TTTxu3221eeOEFhytPPpH2+t577zWLFi0y77zzjtm5c6eprq42brfbvPvuuw5Xnpwi7XePzz77zIwbN86UlpaaiRMnOlNskoum11deeaWZOnWq8fl8pqWlxfz9738P+94xhIu01+vXrzeDBg0yv/3tb81HH31k1q9fb84991xz9dVXO1x58lm7dq2ZN2+eefHFF40ks2bNmuPOT9Tr44ANKFOmTDF33nlnyNhZZ51lqqqq+pw/d+5cc9ZZZ4WMzZkzx3z729+OW40DRaS97ss555xjFixYEOvSBqRo+3399debBx980MyfP5+A0k+R9vq1114zw4YNMwcOHHCivAEl0l4/9thjZty4cSFjjz/+uBk9enTcahyI+hNQEvX6OCAv8XR3d2vz5s0qLS0NGS8tLVVzc3Of57z99tth82fMmKFNmzYpEAjErdZkF02vezt27Jg6Ozs1fPjweJQ4oETb7xUrVujDDz/U/Pnz413igBFNr1955RVNnjxZtbW1OuOMMzR+/Hjdf//9Onz4sBMlJ61oel1YWKg9e/Zo7dq1Msbov//9r1544QXNnDnTiZJPKYl6fUzotxnHy/79+3X06NGwLx7MysoK+4LCHq2trX3OP3LkiPbv36+cnJy41ZvMoul1b7/+9a/1+eef67rrrotHiQNKNP3+17/+paqqKq1fv16pqQPyP/m4iKbXH330kTZs2KAhQ4ZozZo12r9/v8rLy/Xpp5/yPpTjiKbXhYWFeu6553T99dfrf//7n44cOaIrr7xSixcvdqLkU0qiXh8H5A5KD5fLFXJsjAkbO9H8vsYRLtJe9/jDH/4gr9er1atXa9SoUfEqb8Dpb7+PHj2qsrIyLViwQOPHj3eqvAElkuf2sWPH5HK59Nxzz2nKlCn6wQ9+oLq6Oq1cuZJdlH6IpNfbt2/XT3/6Uz300EPavHmzXn/9dbW0tPB9bnGSiNfHAfm/UyNHjlRKSkpY8m5rawtLgT2ys7P7nJ+amqoRI0bErdZkF02ve6xevVq33nqr/vjHP2r69OnxLHPAiLTfnZ2d2rRpk7Zs2aK7775b0hcvosYYpaamqrGxUd///vcdqT3ZRPPczsnJ0RlnnBHydfJnn322jDHas2ePCgoK4lpzsoqm1wsXLlRRUZEeeOABSdL555+voUOH6rvf/a5+9atfsesdQ4l6fRyQOyiDBw/WpEmT5PP5QsZ9Pp8KCwv7POfiiy8Om9/Y2KjJkyfL7XbHrdZkF02vpS92Tm6++WY1NDRwzTgCkfY7MzNT7733nrZu3Rr8ufPOO/XNb35TW7du1dSpU50qPelE89wuKirSvn37dOjQoeDYzp07NWjQII0ePTqu9SazaHrd1dWlQYNCX8JSUlIk/f//u0dsJOz1Ma5vwU2gno+sPfPMM2b79u2moqLCDB061Hz88cfGGGOqqqrMjTfeGJzf8zGq++67z2zfvt0888wzfMy4nyLtdUNDg0lNTTVPPPGE+eSTT4I/n332WaIeQlKJtN+98Sme/ou0152dnWb06NHmmmuuMdu2bTNNTU2moKDA3HbbbYl6CEkj0l6vWLHCpKammqVLl5oPP/zQbNiwwUyePNlMmTIlUQ8haXR2dpotW7aYLVu2GEmmrq7ObNmyJfiRblteHwdsQDHGmCeeeMKMGTPGDB482HzrW98yTU1Nwdtmz55tpk2bFjL/jTfeMBdeeKEZPHiwGTt2rFm2bJnDFSevSHo9bdo0IynsZ/bs2c4XnqQifW5/GQElMpH2+oMPPjDTp083aWlpZvTo0aaystJ0dXU5XHVyirTXjz/+uDnnnHNMWlqaycnJMTfccIPZs2ePw1Unn7/+9a/H/TvYltdHlzHshQEAALsMyPegAACA5EZAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1/h/PME/QXQsSWgAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "final_jacc_triples_tar[\"Jaccard_similarity\"].hist(bins=100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DISNET SIMILARITY" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "df_simi_disnet = pd.DataFrame(my_array_simi)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "df_simi_disnet[\"score\"] = df_simi_disnet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# STATISTICS" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "drebiop = final_jacc_triples[\"Jaccard_similarity\"].tolist()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "drege = final_jacc_triples_tar[\"Jaccard_similarity\"].tolist()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "disnet = df_simi_disnet[\"score\"].tolist()" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "drebiop = np.array(drebiop)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "drege = np.array(drege)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "disnet = np.array(disnet)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MannwhitneyuResult(statistic=22189.0, pvalue=0.05921647904233011)" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stats.mannwhitneyu(drebiop,drege)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MannwhitneyuResult(statistic=1089001604.0, pvalue=4.9458338586431055e-21)" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stats.mannwhitneyu(drebiop,disnet)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 4 }