# -*- coding: utf-8 -*- """ Created on Wed Jan 27 10:26:48 2021 Library """ # Extract the B or B+Is concepts and their entities, starts and ends. # Input: list of list of dictionaries where each dictionary: # {'word' : '', 'score' : '', entity : '', index : '', start : '', end : ''} # Output: List of tuples with four elements. [(concept, entity, start, end)] def extractionOfConcepts(annotations): #VARIABLES entities = [] complete_word = '' start = 0 end = 0 entity = '' i=0 for document in annotations: i = i+1 for word in document: #B if word.get('entity')[0] == 'B': #If there was a previous concept started, finish it if len(complete_word) > 0: entities.append((complete_word, entity, start, end)) complete_word = '' start = 0 end = 0 entity = '' #Strat a new concept complete_word = word.get('word') start = word.get('start') end = word.get('end') entity = word.get('entity')[2:] #I elif word.get('entity')[0] == 'I': #if there isnt a B before, ignore if len(complete_word) > 0: complete_word = complete_word + ' ' + word.get('word') #Update end end = word.get('end') #O elif word.get('entity')[0] == 'O': #If there was a previous concept started, finish it if len(complete_word) > 0: entities.append((complete_word, entity, start, end)) complete_word = '' start = 0 end = 0 entity = '' return entities