Commit 3cb781cd authored by Lucia Catalan Gris's avatar Lucia Catalan Gris

Upload New File

parent c040bbb2
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 8 09:49:51 2021
@author: Lucia
"""
import sys, os, json
import ConceptExtractor
import pandas as pd
#---------------------------- FUNCIONES ---------------------------------------
def stage_tablas(Estadio):
note_id_stage = []
stage_id = []
begin_stage = []
end_stage = []
stage = []
conteo = 0
for j in range(0, len(Estadio)):
stage_id.append(conteo)
conteo = conteo + 1
note_id_stage.append(Estadio.iloc[j][1])
begin_stage.append(Estadio.iloc[j][4])
end_stage.append(Estadio.iloc[j][5])
stage.append(str(Estadio.iloc[j][2].split()[1]))
stage = pd.DataFrame({'stage_id':stage_id, 'stage': stage})
note_stage = pd.DataFrame({'note_id': note_id_stage,
'stage_id':stage_id,
'begin': begin_stage,
'end':end_stage})
return stage, note_stage
def tnm_tablas(Tnm, stage):
conteo = 0
note_id_tnm = []
tnm_id = []
begin_tnm = []
end_tnm = []
t = []
n = []
m = []
for j in range(0, len(Tnm)):
tnm_id.append(conteo)
conteo = conteo + 1
note_id_tnm.append(Tnm.iloc[j][1])
begin_tnm.append(Tnm.iloc[j][4])
end_tnm.append(Tnm.iloc[j][5])
for i in range(0,len(Tnm.iloc[j][2])-1):
if Tnm.iloc[j][2][i] == 't' and i < len(Tnm.iloc[j][2]):
t.append(Tnm.iloc[j][2][i+1])
elif Tnm.iloc[j][2][i] == 'n' and i < len(Tnm.iloc[j][2]):
n.append(Tnm.iloc[j][2][i+1])
elif Tnm.iloc[j][2][i] == 'm' and i < len(Tnm.iloc[j][2]):
m.append(Tnm.iloc[j][2][i+1])
if len(t) < len(tnm_id):
t.append(None)
if len(n) < len(tnm_id):
n.append(None)
if len(m) < len(tnm_id):
m.append(None)
control = 1
note_id_trad = []
begin_trad = []
end_trad = []
stage_id = []
contador_stage = stage['stage_id'].loc[stage.index[-1]]
traducciones = []
for z in range(0, len(t)):
if m[z] == '0':
if t[z] == '4':
traducciones.append('iiib')
elif n[z] == '3':
traducciones.append('iiic')
elif n[z] == '0':
if t[z] == '1':
traducciones.append('i')
elif t[z] == '2':
traducciones.append('iia')
elif t[z] == '3':
traducciones.append('iib')
elif t[z] == '0':
if n[z] == '1':
traducciones.append('iia')
elif n[z] == '2':
traducciones.append('iiia')
elif (n[z] == '1' and t[z] == '1'):
traducciones.append('iia')
elif (n[z] == '1' and t[z] == '2'):
traducciones.append('iib')
elif (n[z] == '2') and (t[z] == '1' or t[z] == '2'):
traducciones.append('iiia')
elif (t[z] == '3') and (n[z] == '1' or n[z] == '2'):
traducciones.append('iiia')
elif m[z] == '1':
traducciones.append('iv')
if control == len(traducciones):
stage_id.append(contador_stage)
contador_stage = contador_stage + 1
note_id_trad.append(note_id_tnm[z])
begin_trad.append(begin_tnm[z])
end_trad.append(end_tnm[z])
control = control + 1
tnm = pd.DataFrame({'tnm_id':tnm_id, 't': t, 'n': n, 'm': m, 'stage_id':stage_id})
note_tnm = pd.DataFrame({'note_id': note_id_tnm,
'tnm_id':tnm_id,
'begin': begin_tnm,
'end':end_tnm})
traduccion_stage = pd.DataFrame({'stage_id':note_id_trad, 'stage': traducciones})
traduccion_note_stage = pd.DataFrame({'note_id': note_id_trad,
'stage_id':stage_id,
'begin': begin_trad,
'end':end_trad})
return tnm, note_tnm, traduccion_stage, traduccion_note_stage
def estadioTnm(annotations, tabla_documentos):
resultado = ConceptExtractor.extractionOfConcepts(annotations)
anotaciones = [anotacion for lista in resultado for anotacion in lista]
concepts = pd.DataFrame({
'EHR': [tabla_documentos.loc[anotaciones[i][4]][1] for i in range(0, len(anotaciones))],
'document_id': [tabla_documentos.loc[anotaciones[i][4]][0] for i in range(0, len(anotaciones))],
'concept' : [anotaciones[i][0] for i in range(0, len(anotaciones))],
'entity' : [anotaciones[i][1] for i in range(0, len(anotaciones))],
'start':[anotaciones[i][2] for i in range(0, len(anotaciones))],
'end': [anotaciones[i][3] for i in range(0, len(anotaciones))],
'id_doc': [anotaciones[i][4] for i in range(0, len(anotaciones))]})
Estadio = concepts.loc[concepts['entity'] == 'STADIO']
tablas_estadio = stage_tablas(Estadio)
stage = tablas_estadio[0]
note_stage = tablas_estadio[1]
Tnm = concepts.loc[concepts['entity'] == 'TNM']
tablas_tnm = tnm_tablas(Tnm, stage)
tnm = tablas_tnm[0]
note_tnm = tablas_tnm[1]
stage = stage.append(tablas_tnm[2], ignore_index = True)
note_stage = note_stage.append(tablas_tnm[3], ignore_index = True)
return tnm, note_tnm, stage, note_stage
#-------------------------- MAIN ----------------------------------------------
#Input: anotaciones de bert (lista de listas de diccionarios)
# EHR e id de los documentos de los que provienen las notas (pendiente quitarlo)
#Output: dos csv
def main():
jsonRoute = sys.argv[1]
documentRoute = sys.argv[2]
if os.path.exists(jsonRoute):
with open(jsonRoute) as json_file:
annotations = json.load(json_file)
if os.path.exists(documentRoute):
tabla_documentos = pd.read_csv(documentRoute)
resultado = estadioTnm(annotations, tabla_documentos)
resultado[0].to_csv(r'tnm.csv', index = False)
resultado[1].to_csv(r'note_tnm.csv', index = False)
resultado[2].to_csv(r'stage.csv', index = False)
resultado[3].to_csv(r'note_stage.csv', index = False)
else:
print("Second argument file doesn't exist")
else:
print("First argument file doesn't exist")
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment