import os import re import json import csv import requests import ConceptExtractor import configparser import mysql.connector #Funcion que dado un json que contiene el id del documento, fecha y texto #encuentra todas las metricas que aparecen en el #Input: json #Output: diccionario de metricas def call_date_metric_ann_module(json_body): r = requests.post("http://138.4.130.153:8088/jkes/annotator/dateAnnotator", json=json_body) dictOutput = {} try: answer = r.json()["response"] for i in range(1,len(answer)): for key in answer[i].keys(): for j in range(0,len(answer[i][key])): for k in range(0,len(answer[i][key][j])): if(answer[i][key][j][k][5] == "METRIC"): if(answer[i][key][j][k][1] in dictOutput.keys()): lAux = dictOutput[answer[i][key][j][k][1]] lAux.append((answer[i][key][j][k][0],answer[i][key][j][k][2])) dictOutput[key] = lAux else: dictOutput[key] = [(answer[i][key][j][k][0],answer[i][key][j][k][2])] except: pass return dictOutput #Funcion que lee las notas de un csv (cambiara cuando BERT nos devuelva el id del documento) #Input: path al csv #Output: diccionario de documentos def read_documents(path): dictDocsOutput = {} if os.path.exists(path): with open(path) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: dictDocsOutput[row[0]] = row[5] return dictDocsOutput #Funcion que lee el json de anotaciones de BERT #Input: path al json de anotaciones #Output: diccionario cuya clave es el id del documento y los valores son los ttos (chemotherapy_drug, radiotherapy_drug, medication) --> cambiar para recuperar las dosis cuando BERT las reconozca def read_json_annotation(path): dictJsonOutput = {} if os.path.exists(path): with open(path) as json_file: annotations = json.load(json_file) concepts = ConceptExtractor.extractionOfConcepts(annotations) for i in range(0,len(concepts)): if(len(concepts[i])==5): if(("CHEMOTHERAPY_DRUG" == concepts[i][1]) or ("RADIOTHERAPY_DRUG" == concepts[i][1]) or ("MEDICATION" == concepts[i][1])): if (concepts[i][4] in dictJsonOutput.keys()): lAux = dictJsonOutput[concepts[i][4]] lAux.append((concepts[i][0],concepts[i][2],concepts[i][3],concepts[i][4])) dictJsonOutput[concepts[i][4]] = lAux else: dictJsonOutput[concepts[i][4]] = [(concepts[i][0],concepts[i][2],concepts[i][3],concepts[i][4])] return dictJsonOutput #Funcion que guarda los datos recopilados en las tablas dosage, treatment, note_dosage y note_treatment def save_ttes_into_database(dictFinal): configuration = configparser.ConfigParser() configuration.read('config.ini') config = {'user':configuration['ARES']['DB_USER'], 'password':configuration['ARES']['DB_PASSWORD'], 'port':configuration['ARES']['DB_PORT'], 'host':configuration['ARES']['DB_HOST'], 'db':configuration['ARES']['DB_NAME'], 'auth_plugin':configuration['ARES']['DB_AUTH_PLUGIN'] } cnx = mysql.connector.connect(**config) #Creamos el cursor cursor = cnx.cursor() queryLastId = "select max(treatment_id) from concept_extraction.treatment order by treatment_id asc;" #Obtenemos el ultimo id insertado en la tabla cursor.execute(queryLastId) lastIdTreatment = 0 for row in cursor: if((row[0] is not None) and (int(row[0])>=0)): lastIdTreatment = int(row[0]) + 1 queryLastId = "select max(dosage_id) from concept_extraction.dosage order by dosage_id asc;" #Obtenemos el ultimo id insertado en la tabla cursor.execute(queryLastId) lastIdDosage = 0 for row in cursor: if((row[0] is not None) and (int(row[0])>=0)): lastIdDosage = int(row[0]) + 1 cursor.close() cnx.close() cnx = mysql.connector.connect(**config) cursor = cnx.cursor() for key in dictFinal: for i in range(0,len(dictFinal[key])): print(lastIdTreatment,lastIdDosage) query,query2,query3,query4="","","","" if((dictFinal[key][i][0]!="") and (dictFinal[key][i][3]=="")): #No hay dosis query = "insert ignore into concept_extraction.treatment (treatment_id,name) values ('"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][0])+"');" query2 = "insert into concept_extraction.note_treatment (note_id,treatment_id,begin,end) values ('"+str(key)+"','"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][1])+"','"+str(dictFinal[key][i][2])+"');" elif((dictFinal[key][i][0]!="") and (dictFinal[key][i][3]!="")): #Hay dosis query = "insert ignore into concept_extraction.treatment (name) values ('"+str(dictFinal[key][0])+"');" query2 = "insert into concept_extraction.note_treatment (note_id,treatment_id,begin,end) values ('"+str(key)+"','"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][1])+"','"+str(dictFinal[key][i][2])+"');" query3 = "insert ignore into concept_extraction.dosage (dosage_id,description) values ('"+str(lastIdDosage)+"','"+dictFinal[key][i][3].encode("UTF8")+"');" query4 = "insert ignore into concept_extraction.note_dosage (note_id,dosage_id,begin,end) values ('"+str(key)+"','"+str(lastIdDosage)+"','"+str(dictFinal[key][i][4])+"','"+str(dictFinal[key][i][5])+"');" else: #Hay dosis pero no hay tto query3 = "insert ignore into concept_extraction.dosage (dosage_id,description) values ('"+str(lastIdDosage)+"','"+dictFinal[key][i][3].encode("UTF8")+"');" query4 = "insert ignore into concept_extraction.note_dosage (note_id,dosage_id,begin,end) values ('"+str(key)+"','"+str(lastIdDosage)+"','"+str(dictFinal[key][i][4])+"','"+str(dictFinal[key][i][5])+"');" if((query3!="") and (query4!="")): print(query3) cursor.execute(query3) cnx.commit() print(query4) cursor.execute(query4) cnx.commit() lastIdDosage += 1 if((query!="") and (query2!="")): cursor.execute(query) cnx.commit() cursor.execute(query2) cnx.commit() lastIdTreatment += 1 cursor.close() cnx.close() #Funcion que relaciona los tratamientos y las metricas (cambiar cuando BERT las reconozca) que aparecen en un documento #Input: documento, el diccionario de anotaciones BERT, el diccionario de metricas y el id del documento tratado #Output: diccionario cuya clave es el id del documento y el valor es un listado de tuplas (tto,begin,end,dosis,begin,end) --> si no hay dosis relacionada, el campo dosis estara vacio y no habra begin ni end de la dosis def relate_treatments(sentence,dictJsonOutput,dictOutput,key): lTreatments = [] lDoses = [] for i in range(0,len(dictJsonOutput[key])): if(dictJsonOutput[key][i][0] in sentence.lower()): indexes = [m.start() for m in re.finditer(dictJsonOutput[key][i][0].lower(), sentence.lower())] #Todas las ocurrencias if(dictJsonOutput[key][i][1] in indexes): lTreatments.append(dictJsonOutput[key][i]) for i in range(0,len(dictOutput[key])): if(dictOutput[key][i][0] in sentence.lower()): index = sentence.lower().index(dictOutput[key][i][0].lower()) if(dictOutput[key][i][1] == index): lDoses.append(dictOutput[key][i]) dictFinal = {} for i in range(0,len(lTreatments)): j=0 aux = "" enc = False while ((j