{ "cells": [ { "cell_type": "code", "execution_count": 6, "id": "41a9395c-0e12-4e7a-85b8-1eac4f02870d", "metadata": {}, "outputs": [], "source": [ "from Bio import Entrez\n", "import GEOparse\n", "import pandas as pd\n", "import os\n", "import preprocess_functions\n", "import insert_tables\n", "import mysql.connector\n", "from mysql.connector import errorcode" ] }, { "cell_type": "code", "execution_count": 7, "id": "34bc052f-3e52-4a26-8946-0c12a9317bde", "metadata": {}, "outputs": [], "source": [ "gpl_path=\"/home/lmasa/GEO_Laura/data/gpl\"" ] }, { "cell_type": "code", "execution_count": 8, "id": "4981165e-a2bf-462b-8b1d-8573872e930b", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "06-Jul-2024 13:30:43 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL94.annot: \n", "06-Jul-2024 13:30:43 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:43 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL571.annot: \n", "06-Jul-2024 13:30:43 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:43 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL570.annot: \n", "06-Jul-2024 13:30:43 DEBUG GEOparse - ANNOTATION: \n", "/home/lmasa/miniconda3/lib/python3.12/site-packages/GEOparse/GEOparse.py:401: DtypeWarning: Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.\n", " return read_csv(StringIO(data), index_col=None, sep=\"\\t\")\n", "06-Jul-2024 13:30:43 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL93.annot: \n", "06-Jul-2024 13:30:43 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL96.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL1426.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL201.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL8300.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL4191.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL97.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:44 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL10526.annot: \n", "06-Jul-2024 13:30:44 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL92.annot: \n", "06-Jul-2024 13:30:45 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL95.annot: \n", "06-Jul-2024 13:30:45 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL74.annot: \n", "06-Jul-2024 13:30:45 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL4133.annot: \n", "06-Jul-2024 13:30:45 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL80.annot: \n", "06-Jul-2024 13:30:45 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:45 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL10558.annot: \n", "06-Jul-2024 13:30:46 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:46 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL91.annot: \n", "06-Jul-2024 13:30:46 DEBUG GEOparse - ANNOTATION: \n", "06-Jul-2024 13:30:46 INFO GEOparse - Parsing /home/lmasa/GEO_Laura/data/gpl/GPL246.annot: \n", "06-Jul-2024 13:30:46 DEBUG GEOparse - ANNOTATION: \n" ] } ], "source": [ "gpl_data=preprocess_functions.fetch_gpl_annot(gpl_path)" ] }, { "cell_type": "code", "execution_count": 4, "id": "23f1d834-caeb-4f0e-a28c-95d258f8ea53", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gpl_idgpl_title
0GPL94[HG_U95D] Affymetrix Human Genome U95D Array
1GPL571[HG-U133A_2] Affymetrix Human Genome U133A 2.0...
2GPL570[HG-U133_Plus_2] Affymetrix Human Genome U133 ...
3GPL93[HG_U95C] Affymetrix Human Genome U95C Array
4GPL96[HG-U133A] Affymetrix Human Genome U133A Array
\n", "
" ], "text/plain": [ " gpl_id gpl_title\n", "0 GPL94 [HG_U95D] Affymetrix Human Genome U95D Array\n", "1 GPL571 [HG-U133A_2] Affymetrix Human Genome U133A 2.0...\n", "2 GPL570 [HG-U133_Plus_2] Affymetrix Human Genome U133 ...\n", "3 GPL93 [HG_U95C] Affymetrix Human Genome U95C Array\n", "4 GPL96 [HG-U133A] Affymetrix Human Genome U133A Array" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gpl_data.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "6389c1a1-c443-4774-bd9d-0fb65af45bee", "metadata": {}, "outputs": [], "source": [ "##Insert GPL data into database" ] }, { "cell_type": "code", "execution_count": 9, "id": "d5785dba-71d0-4269-9ae7-b1968b8e9a19", "metadata": {}, "outputs": [], "source": [ "#The connection details needed to insert data into the database\n", "host = \"{host}\" # Host where the MySQL server is located. Example: 'localhost' or '127.0.0.1'\n", "user = \"{user_name}\" # Username for accessing the MySQL database. Example: 'root' or 'my_user'\n", "password = \"{password}\" # Password for the MySQL user. Ensure to use a secure password for database access.\n", "database = \"{database}\" # Name of the database to connect to. Example: 'disnet_biolayer'\n", "port = \"{port}\" # Port number for the MySQL server. Default is 3306, but it may vary depending on the setup." ] }, { "cell_type": "code", "execution_count": 10, "id": "b823d154-7c31-4aee-80b1-89bb933edb58", "metadata": {}, "outputs": [], "source": [ "# Initialize the connection variable to None\n", "conn = None\n", "\n", "try:\n", " # Attempt to establish a connection to the MySQL database\n", " conn = mysql.connector.connect(\n", " host=host,\n", " user=user,\n", " password=password,\n", " database=database,\n", " port=port\n", " )\n", " # Insert GPL data into the database\n", " insert_tables.insert_gpl_main(conn, df)\n", "\n", "# Handle any MySQL errors that occur during the connection or insertion process\n", "except mysql.connector.Error as err:\n", " print(f\"Error connecting to MySQL: {err}\")\n", "\n", "# Ensure the connection is closed properly even if an error occurs\n", "finally:\n", " if conn and conn.is_connected():\n", " conn.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 5 }