Commit b2213934 authored by Lucia Catalan Gris's avatar Lucia Catalan Gris

Upload New File

parent 8f4b70e8
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 9 10:15:21 2021
@author: Lucia
"""
import sys, os, json
import ConceptExtractor
import pandas as pd
#------------------- ANTECEDENTES FAMILIARES ----------------------------------
# Rellena las tablas family_antecedents y note_family_antecendets de concept_extraction
#Input: anotaciones de bert (lista de listas de diccionarios)
# EHR e id de los documentos de los que provienen las notas
#Output: dos csv
def antecedentes_familiares_tablas(annotations, tabla_documentos):
#Estraemos anotaciones
resultado = ConceptExtractor.extractionOfConcepts(annotations)
anotaciones = [anotacion for lista in resultado for anotacion in lista]
concepts = pd.DataFrame({
'EHR': [tabla_documentos.loc[anotaciones[i][4]][1] for i in range(0, len(anotaciones))],
'document_id': [tabla_documentos.loc[anotaciones[i][4]][0] for i in range(0, len(anotaciones))],
'concept' : [anotaciones[i][0] for i in range(0, len(anotaciones))],
'entity' : [anotaciones[i][1] for i in range(0, len(anotaciones))],
'start':[anotaciones[i][2] for i in range(0, len(anotaciones))],
'end': [anotaciones[i][3] for i in range(0, len(anotaciones))],
'id_doc': [anotaciones[i][4] for i in range(0, len(anotaciones))]})
#Filtramos por FAMILY
Family = concepts.loc[concepts['entity'] == 'FAMILY']
#Variables
another_family_flag = False
conteo = 0
family_antecedents_id = []
family_member = []
cancer_type_family_member = []
begin = []
end = []
note_id = []
for j in range(0, len(Family)):
#Indice de la primera palabra del concepto de familia
indice = next((pos for pos, item in enumerate(annotations[Family.iloc[j][6]]) if item["word"] == Family.iloc[j][2].split()[0]), None)
for i in range(indice + 1, indice + 4):
#Si encuentro un concepto de cancer
if annotations[Family.iloc[j][6]][i].get('entity') == 'B_CANCER_CONCEPT' and another_family_flag == False:
#id de la anotacion
family_antecedents_id.append(conteo)
conteo = conteo + 1
family_member.append(Family.iloc[j][2])
note_id.append(Family.iloc[j][1])
begin.append(Family.iloc[j][4])
end.append(Family.iloc[j][5])
for a in anotaciones:
if (a[1] == 'CANCER_CONCEPT') and (a[4] == Family.iloc[j][6]) and (a[2] == annotations[Family.iloc[j][6]][i].get('start')):
cancer_type_family_member.append(a[0])
break
#No busca mas
break
#si encuentro otro concepto de FAMILY
elif annotations[Family.iloc[j][6]][i].get('entity') == 'B_FAMILY':
another_family_flag = True
break
another_family_flag = False
#TABLAS
family_antecedents = pd.DataFrame({'family_antecedents_id':family_antecedents_id,
'family_member': family_member,
'cancer_type_family_member':cancer_type_family_member})
family_antecedents.to_csv(r'family_antecedents.csv', index = False)
note_family_antecendets = pd.DataFrame({'note_id': note_id,
'family_antecedents_id':family_antecedents_id,
'begin': begin,
'end':end})
note_family_antecendets.to_csv(r'note_family_antecendets.csv', index = False)
#-------------------------- MAIN ----------------------------------------------
#Input: anotaciones de bert (lista de listas de diccionarios)
# EHR e id de los documentos de los que provienen las notas (pendiente quitarlo)
#Output: dos csv
def main():
jsonRoute = sys.argv[1]
documentRoute = sys.argv[2]
if os.path.exists(jsonRoute):
with open(jsonRoute) as json_file:
annotations = json.load(json_file)
if os.path.exists(documentRoute):
tabla_documentos = pd.read_csv(documentRoute)
antecedentes_familiares_tablas(annotations, tabla_documentos)
else:
print("Second argument file doesn't exist")
else:
print("First argument file doesn't exist")
if __name__ == "__main__":
main()
'''
#----------------- EXTRAER ANOTACIONES ----------------------------------------
with open('annotations.json') as json_file:
annotations = json.load(json_file)
#------------------ clarifyv2.document ----------------------------------------
tabla_documentos = pd.read_csv("documentos_clarifyv2.csv")
antecedentes_familiares_tablas(annotations, tabla_documentos)
'''
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment