smokerExtractor.py 2.94 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
import os
import json
import ConceptExtractor


#Funcion que lee el json de anotaciones de BERT
#Input: path al json de anotaciones
#Output: diccionario cuya clave es el id del documento y los valores son los ttos (chemotherapy_drug, radiotherapy_drug, medication) --> cambiar para recuperar las dosis cuando BERT las reconozca
def read_json_annotation(path):

	dictJsonStatus = {}
	dictJsonQuantity = {}

	if os.path.exists(path):          
		with open(path) as json_file:
			annotations = json.load(json_file)
			concepts = ConceptExtractor.extractionOfConcepts(annotations)
			
			for i in range(0,len(concepts)):
				if(len(concepts[i])==4):
					if("SMOKER_STATUS" == concepts[i][1]):
						if (1 in dictJsonStatus.keys()): #Cambiar el 1 por el id del documento
							lAux  = dictJsonStatus[1]
							lAux.append((concepts[i][0],concepts[i][2],concepts[i][3]))
							dictJsonStatus[1] = lAux
						else:
							dictJsonStatus[1] = [(concepts[i][0],concepts[i][2],concepts[i][3])]

					elif("SMOKER_QUANTITY" == concepts[i][1]):
						if (1 in dictJsonQuantity.keys()): #Cambiar el 1 por el id del documento
							lAux  = dictJsonQuantity[1]
							lAux.append((concepts[i][0],concepts[i][2],concepts[i][3]))
							dictJsonQuantity[1] = lAux
						else:
							dictJsonQuantity[1] = [(concepts[i][0],concepts[i][2],concepts[i][3])]
						

	return dictJsonStatus,dictJsonQuantity


def join_status_quantities(dictJsonStatus,dictJsonQuantity):

	dictFinal = {}
	
	for key in dictJsonStatus.keys():
		aux = []

		if(key in dictJsonQuantity.keys()):
			for i in range(0,len(dictJsonQuantity[key])):
				if(abs(dictJsonStatus[key][3]-dictJsonQuantity[key][i][1])<=5): # El fin del concepto y el inicio del status no está a una diferencia mayor de 5 caracteres
					aux = [(dictJsonStatus[key][0],dictJsonStatus[key][2],dictJsonStatus[key][3],dictJsonQuantity[key][i][0],dictJsonQuantity[key][i][1],int(dictJsonQuantity[key][i][1])+len(dictJsonQuantity[key][i][0]))]
				else:
					aux = [(dictJsonStatus[i][0],dictJsonStatus[i][2],dictJsonStatus[i][3],"")] + [(dictJsonQuantity[key][i][0],dictJsonQuantity[key][i][1],int(dictJsonQuantity[key][i][1])+len(dictJsonQuantity[key][i][0]))]
		else:
			aux = [(dictJsonStatus[key][0],dictJsonStatus[key][2],dictJsonStatus[key][3],"")]

		
		if (1 in dictJsonQuantity.keys()): #Cambiar el 1 por el id del documento
				lAux  = dictFinal[1]
				lAux += aux
				dictFinal[1] = lAux
		else:
			dictFinal[1] = aux


	for key in dictJsonQuantity.keys():
		
		if (key not in dictJsonStatus.keys()):
			aux = [(dictJsonStatus[i][0],dictJsonStatus[i][2],dictJsonStatus[i][3],"")]

		if (1 in dictJsonQuantity.keys()): #Cambiar el 1 por el id del documento
				lAux  = dictFinal[1]
				lAux += aux
				dictFinal[1] = lAux
		else:
			dictFinal[1] = aux

	return dictFinal


path = "/home/jarvos/Escritorio/Extractor/annotation_output.json"
dictJsonStatus,dictJsonQuantity = read_json_annotation(path)
dictFinal = join_status_quantities(dictJsonStatus,dictJsonQuantity)