import copy import pickle from datetime import date from utils_sql import * import requests currentDate = str(date.today()).split(" ")[0] #Funcion de llamada al modulo de metricas para obtener las metricas que aparecen en un texto #Input: id Documento, frases a anotar #Output: diccionario {key: idDoc, value: [metricas]} def checkDoses(idDoc, sent): metricsList = [] try: resp = requests.post(url="http://138.4.130.153:8088/jkes/annotator/dateAnnotator", json={str(idDoc): [currentDate, [sent.strip()]]}, verify=False) result_metrics = resp.json() for metrics in result_metrics['response'][1]: for metricConcepts in result_metrics['response'][1][metrics]: for metric in metricConcepts: if metric[5] == 'METRIC' or metric[5] == 'NUMBER': metricsList.append(metric[0]) except: pass return metricsList #Funcion que obtiene los leucocitos, linfocitos y la hemoglobina dada en un texto junto #sus metricas correspondientes #Input: diccionario {key: EHR, value: [conceptos] #Output: diccionario {key: EHR, value: {key: idDoc, value: [metricas]}} def checkHemograms(dictHemograms): #dict: {EHR: [(docId, sentence, sentence_id, concepts, begin, end),...()]} sentsHemogram = [] listLeucocitos = [] listFLeucocitos = [] listLinfocitos = [] listFLinfocitos = [] listHemoglob = [] listFHemoglob = [] dictResult = {} dictFails = {} count = 1 for key in dictHemograms: print("Process: " + str(key)) print(count) count += 1 dictResult[key] = {} dictFails[key] = {} for concepts in dictHemograms[key]: listLeucocitos.clear() listLinfocitos.clear() listHemoglob.clear() listFLeucocitos.clear() listFLinfocitos.clear() listFHemoglob.clear() dictResult[key], dictFails[key] = findHemograms(concepts, dictResult[key], dictFails[key], listLeucocitos, listFLeucocitos, listLinfocitos, listFLinfocitos, listHemoglob, listFHemoglob) pickle.dump(dictResult, open("hemograms_v2.p", "wb")) pickle.dump(dictFails, open("failsHemogram_v2.p", "wb")) print("finish") #Funcion auxiliar que busca las palabras leucocitos, linfocitos y hemoglobina dadas en un texto junto #sus metricas correspondientes def findHemograms(concepts, patient, errors, listLeucocitos, listFLeucocitos, listLinfocitos, listFLinfocitos, listHemoglob, listFHemoglob): sentConcept = concepts[1] sentConceptId = concepts[2] sentsHemogram = sentConcept.split(",") idDoc = concepts[0] tamSentence = 0 if (idDoc, sentConceptId) not in patient: for sent in sentsHemogram: sent = sent.lower() if ('leucoci' in sent): sent_metric = sent.split("leucoci", 1)[1] metric = checkDoses(idDoc, sent_metric) if metric != []: completeHemogram = 'Leucocitos' + " " + metric[0] positionConcept = sent.find('leucoci') begin = tamSentence + positionConcept end = begin + len('leucocitos') listLeucocitos.append((completeHemogram, begin, end)) else: listFLeucocitos.append((sentConcept, sent, 'Leucocitos')) listLeucocitos.append('Leucocitos') if ('linfoci' in sent): sent_metric = sent.split("linfoci", 1)[1] metric = checkDoses(idDoc, sent_metric) if metric != []: completeHemogram = 'Linfocitos' + " " + metric[0] positionConcept = sent.find('linfoci') begin = tamSentence + positionConcept end = begin + len('linfocitos') listLinfocitos.append((completeHemogram, begin, end)) else: listFLinfocitos.append((sentConcept, sent, 'Linfocitos')) listLinfocitos.append('Linfocitos') if ('hemoglob' in sent): sent_metric = sent.split("hemoglob", 1)[1] metric = checkDoses(idDoc, sent_metric) if metric != []: completeHemogram = 'Hemoglobina' + " " + metric[0] positionConcept = sent.find('hemoglob') begin = tamSentence + positionConcept end = begin + len('hemoglobina') listHemoglob.append((completeHemogram, begin, end)) else: listFHemoglob.append((sentConcept, sent, 'Hemoglobina',)) listHemoglob.append('Hemoglobina') tamSentence += len(sent) + 1 patient[(idDoc, sentConceptId)] = [copy.copy(listLeucocitos), copy.copy(listLinfocitos), copy.copy(listHemoglob)] errors[(idDoc, sentConceptId)] = [copy.copy(listFLeucocitos), copy.copy(listFLinfocitos), copy.copy(listFHemoglob)] errors[(idDoc, sentConceptId)] = [x for x in errors[(idDoc, sentConceptId)] if x != []] return patient, errors #Funcion auxiliar para insertar los conceptos de hemograma de un paciente en la BBDD de breast_annotations y #concept_extraction def insertHemograms(hemograms): breast_clarif_breast_mngr = generate_db_connection("138.4.130.153", 3306, "medaldeveloper", "currentClarif3D$B", "clarify_breast_annotations") queryLastId = "select max(hemogram_id) from concept_extraction.hemogram order by hemogram_id asc;" # Obtenemos el ultimo id insertado en la tabla breast_clarif_breast_mngr['cursor'].execute(queryLastId) lastIdHemogram = 0 for row in breast_clarif_breast_mngr['cursor']: if ((row[0] is not None) and (int(row[0]) >= 0)): lastIdHemogram = int(row[0]) + 1 breast_clarif_breast_mngr['cnx'].close() breast_clarif_breast_mngr['cursor'].close() breast_clarif_breast = generate_db_connection("138.4.130.153", 3306, "medaldeveloper", "currentClarif3D$B", "clarify_breast_annotations") clarify_conceptExt = generate_db_connection("138.4.130.153", 3306, "medaldeveloper", "currentClarif3D$B", "concept_extraction") for ehr in hemograms: #Insert Breast annotations insertAnnotations(ehr, hemograms[ehr], lastIdHemogram, breast_clarif_breast, clarify_conceptExt) breast_clarif_breast_mngr['cnx'].close() breast_clarif_breast_mngr['cursor'].close() #Funcion auxiliar para insertar los conceptos de hemograma de un paciente en la BBDD de breast_annotations y #concept_extraction def insertAnnotations(ehr, concepts, lastIdHemogram, cursorBreast, cursorConcept): for docs in concepts: for i in range(len(concepts[docs])): if concepts[docs][i][0] == []: concepts[docs][i][0] = 'None' listConcepts = concepts[docs] idDoc = docs[0] sentence_id = docs[1] insert_breast = "insert into clarify_breast_annotations.hemogram (EHR, leucocytes, lymphocytes, redBloodCells) values ('"+str(ehr)+"','"+str(listConcepts[0][0])+"','"+str(listConcepts[1][0])+"','"+str(listConcepts[2][0])+"')" #cursorBreast['cursor'].execute(insert_breast) #cursorBreast['cnx'].commit() insert_conceptExt = "insert into concept_extraction.hemogram (hemogram_id, leucocytes, lymphocytes, red_blood_cells) values ('"+str(lastIdHemogram)+"','"+str(listConcepts[0][0])+"','"+str(listConcepts[1][0])+"','"+str(listConcepts[2][0])+"')" #cursorConcept['cursor'].execute(insert_conceptExt) #cursorConcept['cnx'].commit() lastIdHemogram += 1 insert_note_concept_leu = "insert into concept_extraction.note_hemogram (note_id, sentence_id, hemogram_id, begin, end, negation, speculation) values ('"+str(idDoc)+"','"+str(sentence_id)+"','"+str(lastIdHemogram)+"','"+str(listConcepts[0][1])+"', ,'"+str(listConcepts[0][2])+"')" insert_note_concept_lin = "insert into concept_extraction.note_hemogram (note_id, sentence_id, hemogram_id, begin, end, negation, speculation) values ('" + str(idDoc) + "','" + str(sentence_id) + "','" + str(lastIdHemogram) + "','" + str(listConcepts[1][1]) + "', ,'" + str(listConcepts[1][2]) + "')" insert_note_concept_hem = "insert into concept_extraction.note_hemogram (note_id, sentence_id, hemogram_id, begin, end, negation, speculation) values ('" + str(idDoc) + "','" + str(sentence_id) + "','" + str(lastIdHemogram) + "','" + str(listConcepts[2][1]) + "', ,'" + str(listConcepts[2][2]) + "')" #cursorConcept['cursor'].execute(insert_note_concept_leu) #cursorConcept['cursor'].execute(insert_note_concept_lin) #cursorConcept['cursor'].execute(insert_note_concept_hem) #cursorConcept['cnx'].commit() #Funcion para obtener todos los conceptos anotados de un determinado paciente #Input: tuplas de conceptos con su informaciin anotada #Output: diccionario {key: idDoc, value: [conceptos]} def dictConceptsEhr(docId, listEhr, sentence, sentence_id, concepts, begin, end, umlsBatch): dict = {} listConcepts = [] for i in range(len(umlsBatch)): listConcepts.clear() if (umlsBatch['ehr'][i] not in dict): for j in range(len(listEhr)): if (umlsBatch['ehr'][i] == listEhr[j]): listConcepts.append((docId[j], sentence[j], sentence_id[j], concepts[j], begin[j], end[j])) dict[umlsBatch['ehr'][i]] = copy.copy(listConcepts) return dict #Flujo principal del proceso de extracción de los hemogramas def hemogramConcepts(): ''' umls_hemograms_v2 = compose_dataframe_from_query(breast_clarif_mngr_umls, "umls_old_dx", None, None, "concept in ('Hemoglobina', 'Leucocitos', 'Linfocitos') and entity_flag = 'Hemogram'", None) pickle.dump(umls_hemograms_v2, open("umls_hemograms_v2.p", "wb")) umls_hemograms_v2 = pickle.load(open('umls_hemograms_v2.p', "rb")) docId_hemogram = umls_hemograms_v2['document_id'] ehr_hemogram = umls_hemograms_v2['ehr'] concepts_hemogram = umls_hemograms_v2['concept'] sentence_hemogram = umls_hemograms_v2['sentence'] sentenceId_hemogram = umls_hemograms_v2['sentence_id'] beginConcept_hemogram = umls_hemograms_v2['begin'] endConcept_hemogram = umls_hemograms_v2['end'] dictHemograms_v2 = dictConceptsEhr(docId_hemogram, ehr_hemogram, sentence_hemogram, sentenceId_hemogram, concepts_hemogram, beginConcept_hemogram, endConcept_hemogram, umls_hemograms_v2) pickle.dump(dictHemograms_v2, open("umlsDict_hemograms_v2.p", "wb")) ''' dict_hemograms = pickle.load(open('umlsDict_hemograms_v2.p', "rb")) hemograms = checkHemograms(dict_hemograms) # Output: hemograms and list of errors #hemograms = pickle.load(open('hemograms.p', "rb")) #failsHemograms = pickle.load(open('failsHemogram.p', "rb")) insertHemograms(hemograms) hemogramConcepts()