Commit 1378bd54 authored by Jorge Molina Gomez's avatar Jorge Molina Gomez

Proceso para extraer los hemogramas de umls_old_dx

parent 9396d39a
This diff is collapsed.
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 2 11:07:37 2021
@author: ctb
"""
import mysql.connector
import pandas as pd
def generate_db_connection(ip, port, user, password, db_name):
db_conn = mysql.connector.connect(
host=ip,
port=port,
user=user,
password=password,
database=db_name,
auth_plugin='mysql_native_password'
)
db_cursor = db_conn.cursor()
return {"cnx": db_conn, "cursor": db_cursor}
def get_columns_from_table(db_mngr, table_name):
db_mngr["db_crs"].execute("show columns from "+table_name)
semantic_annotation_table_cols = db_mngr["db_crs"].fetchall()
col_list = []
for x in semantic_annotation_table_cols:
col_list.append(x[0])
return col_list
# Genera un dataframe con el contenido de la tabla indicada en 'table_name'.
# Si se pasan numeros enteros en los campos opcionales 'batch_size' y 'offset', se devolvera el dataframe
# correspondiente a un numero de 'batch_size' filas de la tabla, desde la fila indicada por 'offset'.
def compose_dataframe_from_query(db_mngr, table_name, batch_size = None, offset = None, where_clause = None, join_clause = None):
if(join_clause is not None):
query = "Select u.document_id, u.ehr, u.concept, d.subcategory, u.begin, u.end from " + table_name + " u inner join document d on d.ID = u.document_id";
else:
query = "Select document_id, ehr, sentence, sentence_id, concept, begin, end from " + table_name
if where_clause is not None:
query = query + " where " + where_clause
#if group_by is not None:
# query = query + " group by " + group_by
if batch_size is not None and offset is not None:
query = query + " limit " + str(batch_size) + " offset " + str(offset)
db_mngr["db_crs"].execute(query)
result = db_mngr["db_crs"].fetchall()
if(join_clause is not None):
table_cols = ["document_id", "ehr", "concept", "subcategory", "begin", "end"]
else:
table_cols = ["document_id", "ehr", "sentence", "sentence_id", "concept", "begin", "end"]
if len(result) > 0:
result = pd.DataFrame(result)
result.columns = table_cols
else:
result = pd.DataFrame(columns=table_cols)
return result
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment