Delete antecedentesFamiliares.py

8f4b70e8 · Lucia Catalan Gris · d92eda54 · d92eda54
Commit 8f4b70e8 authored Feb 19, 2021 by Lucia Catalan Gris
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 96 deletions

Familiar_Antecedents_Extractor/antecedentesFamiliares.py Familiar_Antecedents_Extractor/antecedentesFamiliares.py +0 -96

No files found.
--- a/Familiar_Antecedents_Extractor/antecedentesFamiliares.py
+++ b/Familiar_Antecedents_Extractor/antecedentesFamiliares.py
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Feb  9 10:15:21 2021
-
-@author: Lucia
-"""
-import json
-import ConceptExtractor 
-import pandas as pd
-
-#----------------- EXTRAER ANOTACIONES ----------------------------------------
-
-with open('annotations.json') as json_file:
-   annotations = json.load(json_file)
-
-#------------------ clarifyv2.document ----------------------------------------
-
-tabla_documentos = pd.read_csv("documentos_clarifyv2.csv")
-    
-#------------------- ANTECEDENTES FAMILIARES ----------------------------------  
-
-# Rellena las tablas family_antecedents y note_family_antecendets de concept_extraction
-#Input: anotaciones de bert (lista de listas de diccionarios)
-#       EHR e id de los documentos de los que provienen las notas
-#Output: dos csv
-def antecedentes_familiares_tablas(annotations, tabla_documentos):
-    
-    #Estraemos anotaciones
-    resultado = ConceptExtractor.extractionOfConcepts(annotations)
-    anotaciones = [anotacion for lista in resultado for anotacion in lista]
-    concepts = pd.DataFrame({
-            'EHR': [tabla_documentos.loc[anotaciones[i][4]][1] for i in range(0, len(anotaciones))],  
-            'document_id': [tabla_documentos.loc[anotaciones[i][4]][0] for i in range(0, len(anotaciones))],
-            'concept' : [anotaciones[i][0] for i in range(0, len(anotaciones))],
-            'entity' : [anotaciones[i][1] for i in range(0, len(anotaciones))],
-            'start':[anotaciones[i][2] for i in range(0, len(anotaciones))],
-            'end': [anotaciones[i][3] for i in range(0, len(anotaciones))],
-            'id_doc': [anotaciones[i][4] for i in range(0, len(anotaciones))]})
-    
-    #Filtramos por FAMILY
-    Family = concepts.loc[concepts['entity'] == 'FAMILY']
-    #Variables
-    another_family_flag = False
-    conteo = 0
-    family_antecedents_id = []
-    family_member = []
-    cancer_type_family_member = []
-    begin = []
-    end = []
-    note_id = []
-
-    for j in range(0, len(Family)): 
-    
-        #Indice de la primera palabra del concepto de familia
-        indice = next((pos for pos, item in enumerate(annotations[Family.iloc[j][6]]) if item["word"] == Family.iloc[j][2].split()[0]), None)
-    
-        for i in range(indice + 1, indice + 4):
-        
-            #Si encuentro un concepto de cancer
-            if annotations[Family.iloc[j][6]][i].get('entity') == 'B_CANCER_CONCEPT' and another_family_flag == False:
-                #id de la anotacion
-                family_antecedents_id.append(conteo)
-                conteo = conteo + 1
-            
-                family_member.append(Family.iloc[j][2])
-                note_id.append(Family.iloc[j][1])            
-                begin.append(Family.iloc[j][4])
-                end.append(Family.iloc[j][5])
-            
-                for a in anotaciones:
-                    if (a[1] == 'CANCER_CONCEPT') and (a[4] == Family.iloc[j][6]) and (a[2] == annotations[Family.iloc[j][6]][i].get('start')):
-                        cancer_type_family_member.append(a[0])                
-                        break
-                #No busca mas
-                break
-        
-            #si encuentro otro concepto de FAMILY           
-            elif annotations[Family.iloc[j][6]][i].get('entity') == 'B_FAMILY':
-                another_family_flag = True
-                break
-        
-        another_family_flag = False     
-            
-        
-    #TABLAS
-    family_antecedents = pd.DataFrame({'family_antecedents_id':family_antecedents_id,
-                                   'family_member': family_member,
-                                   'cancer_type_family_member':cancer_type_family_member})
-    family_antecedents.to_csv(r'family_antecedents.csv', index = False)   
-    note_family_antecendets = pd.DataFrame({'note_id': note_id,
-                                        'family_antecedents_id':family_antecedents_id,
-                                        'begin': begin,
-                                        'end':end})
-    note_family_antecendets.to_csv(r'note_family_antecendets.csv', index = False) 
-
-antecedentes_familiares_tablas(annotations, tabla_documentos)
\ No newline at end of file