From 3606e8eb1b223b4c4e679689df8a3c4c176d8cbc Mon Sep 17 00:00:00 2001 From: Javier Rodriguez Vidal Date: Fri, 19 Feb 2021 11:18:11 +0000 Subject: [PATCH] =?UTF-8?q?Script=20que=20permite=20recuperar=20los=20conc?= =?UTF-8?q?eptos=20anotados=20por=20BERT=20como=20ttos=20(Chemotherapy=5Fd?= =?UTF-8?q?rug,=20Radiotherapy=5Fdrug,=20Medication)=20y=20los=20relaciona?= =?UTF-8?q?,=20provisionalmente,=20con=20las=20m=C3=A9tricas=20recuperadas?= =?UTF-8?q?=20a=20trav=C3=A9s=20del=20anotador=20de=20Alejandro=20(cambiar?= =?UTF-8?q?lo=20por=20las=20anotaciones=20de=20BERT)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../treatmentDosesRelation.py | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 Treatments_Doses_Extractor/treatmentDosesRelation.py diff --git a/Treatments_Doses_Extractor/treatmentDosesRelation.py b/Treatments_Doses_Extractor/treatmentDosesRelation.py new file mode 100644 index 0000000..3fac6c8 --- /dev/null +++ b/Treatments_Doses_Extractor/treatmentDosesRelation.py @@ -0,0 +1,265 @@ +import os +import re +import json +import csv +import requests +import ConceptExtractor +import configparser +import mysql.connector + +#Funcion que dado un json que contiene el id del documento, fecha y texto +#encuentra todas las metricas que aparecen en el +#Input: json +#Output: diccionario de metricas +def call_date_metric_ann_module(json_body): + r = requests.post("http://138.4.130.153:8088/jkes/annotator/dateAnnotator", json=json_body) + dictOutput = {} + try: + answer = r.json()["response"] + for i in range(1,len(answer)): + for key in answer[i].keys(): + for j in range(0,len(answer[i][key])): + for k in range(0,len(answer[i][key][j])): + + if(answer[i][key][j][k][5] == "METRIC"): + if(answer[i][key][j][k][1] in dictOutput.keys()): + lAux = dictOutput[answer[i][key][j][k][1]] + lAux.append((answer[i][key][j][k][0],answer[i][key][j][k][2])) + dictOutput[key] = lAux + else: + dictOutput[key] = [(answer[i][key][j][k][0],answer[i][key][j][k][2])] + + except: + pass + + return dictOutput + +#Funcion que lee las notas de un csv (cambiara cuando BERT nos devuelva el id del documento) +#Input: path al csv +#Output: diccionario de documentos +def read_documents(path): + + dictDocsOutput = {} + + if os.path.exists(path): + with open(path) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + for row in csv_reader: + dictDocsOutput[row[0]] = row[5] + + return dictDocsOutput + + +#Funcion que lee el json de anotaciones de BERT +#Input: path al json de anotaciones +#Output: diccionario cuya clave es el id del documento y los valores son los ttos (chemotherapy_drug, radiotherapy_drug, medication) --> cambiar para recuperar las dosis cuando BERT las reconozca +def read_json_annotation(path): + + dictJsonOutput = {} + + if os.path.exists(path): + with open(path) as json_file: + annotations = json.load(json_file) + concepts = ConceptExtractor.extractionOfConcepts(annotations) + + for i in range(0,len(concepts)): + if(len(concepts[i])==5): + if(("CHEMOTHERAPY_DRUG" == concepts[i][1]) or ("RADIOTHERAPY_DRUG" == concepts[i][1]) or ("MEDICATION" == concepts[i][1])): + if (concepts[i][4] in dictJsonOutput.keys()): + lAux = dictJsonOutput[concepts[i][4]] + lAux.append((concepts[i][0],concepts[i][2],concepts[i][3],concepts[i][4])) + dictJsonOutput[concepts[i][4]] = lAux + else: + dictJsonOutput[concepts[i][4]] = [(concepts[i][0],concepts[i][2],concepts[i][3],concepts[i][4])] + + return dictJsonOutput + +#Funcion que guarda los datos recopilados en las tablas dosage, treatment, note_dosage y note_treatment +def save_ttes_into_database(dictFinal): + + configuration = configparser.ConfigParser() + configuration.read('config.ini') + + config = {'user':configuration['ARES']['DB_USER'], + 'password':configuration['ARES']['DB_PASSWORD'], + 'port':configuration['ARES']['DB_PORT'], + 'host':configuration['ARES']['DB_HOST'], + 'db':configuration['ARES']['DB_NAME'], + 'auth_plugin':configuration['ARES']['DB_AUTH_PLUGIN'] + } + + cnx = mysql.connector.connect(**config) + #Creamos el cursor + cursor = cnx.cursor() + + queryLastId = "select max(treatment_id) from concept_extraction.treatment order by treatment_id asc;" #Obtenemos el ultimo id insertado en la tabla + cursor.execute(queryLastId) + lastIdTreatment = 0 + for row in cursor: + if((row[0] is not None) and (int(row[0])>=0)): + lastIdTreatment = int(row[0]) + 1 + + queryLastId = "select max(dosage_id) from concept_extraction.dosage order by dosage_id asc;" #Obtenemos el ultimo id insertado en la tabla + cursor.execute(queryLastId) + lastIdDosage = 0 + for row in cursor: + if((row[0] is not None) and (int(row[0])>=0)): + lastIdDosage = int(row[0]) + 1 + cursor.close() + cnx.close() + cnx = mysql.connector.connect(**config) + cursor = cnx.cursor() + + for key in dictFinal: + for i in range(0,len(dictFinal[key])): + print(lastIdTreatment,lastIdDosage) + query,query2,query3,query4="","","","" + + if((dictFinal[key][i][0]!="") and (dictFinal[key][i][3]=="")): #No hay dosis + query = "insert ignore into concept_extraction.treatment (treatment_id,name) values ('"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][0])+"');" + query2 = "insert into concept_extraction.note_treatment (note_id,treatment_id,begin,end) values ('"+str(key)+"','"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][1])+"','"+str(dictFinal[key][i][2])+"');" + elif((dictFinal[key][i][0]!="") and (dictFinal[key][i][3]!="")): #Hay dosis + query = "insert ignore into concept_extraction.treatment (name) values ('"+str(dictFinal[key][0])+"');" + query2 = "insert into concept_extraction.note_treatment (note_id,treatment_id,begin,end) values ('"+str(key)+"','"+str(lastIdTreatment)+"','"+str(dictFinal[key][i][1])+"','"+str(dictFinal[key][i][2])+"');" + query3 = "insert ignore into concept_extraction.dosage (dosage_id,description) values ('"+str(lastIdDosage)+"','"+dictFinal[key][i][3].encode("UTF8")+"');" + query4 = "insert ignore into concept_extraction.note_dosage (note_id,dosage_id,begin,end) values ('"+str(key)+"','"+str(lastIdDosage)+"','"+str(dictFinal[key][i][4])+"','"+str(dictFinal[key][i][5])+"');" + else: #Hay dosis pero no hay tto + query3 = "insert ignore into concept_extraction.dosage (dosage_id,description) values ('"+str(lastIdDosage)+"','"+dictFinal[key][i][3].encode("UTF8")+"');" + query4 = "insert ignore into concept_extraction.note_dosage (note_id,dosage_id,begin,end) values ('"+str(key)+"','"+str(lastIdDosage)+"','"+str(dictFinal[key][i][4])+"','"+str(dictFinal[key][i][5])+"');" + + if((query3!="") and (query4!="")): + print(query3) + cursor.execute(query3) + cnx.commit() + print(query4) + cursor.execute(query4) + cnx.commit() + lastIdDosage += 1 + + if((query!="") and (query2!="")): + cursor.execute(query) + cnx.commit() + cursor.execute(query2) + cnx.commit() + lastIdTreatment += 1 + + cursor.close() + cnx.close() + + +#Funcion que relaciona los tratamientos y las metricas (cambiar cuando BERT las reconozca) que aparecen en un documento +#Input: documento, el diccionario de anotaciones BERT, el diccionario de metricas y el id del documento tratado +#Output: diccionario cuya clave es el id del documento y el valor es un listado de tuplas (tto,begin,end,dosis,begin,end) --> si no hay dosis relacionada, el campo dosis estara vacio y no habra begin ni end de la dosis +def relate_treatments(sentence,dictJsonOutput,dictOutput,key): + + lTreatments = [] + lDoses = [] + + for i in range(0,len(dictJsonOutput[key])): + if(dictJsonOutput[key][i][0] in sentence.lower()): + indexes = [m.start() for m in re.finditer(dictJsonOutput[key][i][0].lower(), sentence.lower())] #Todas las ocurrencias + if(dictJsonOutput[key][i][1] in indexes): + lTreatments.append(dictJsonOutput[key][i]) + + + for i in range(0,len(dictOutput[key])): + if(dictOutput[key][i][0] in sentence.lower()): + index = sentence.lower().index(dictOutput[key][i][0].lower()) + if(dictOutput[key][i][1] == index): + lDoses.append(dictOutput[key][i]) + + dictFinal = {} + + for i in range(0,len(lTreatments)): + j=0 + aux = "" + enc = False + while ((j