Script que permite componer los conceptos de las anotaciones BERT (B, B+I)

parent bf0528ec
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 27 10:26:48 2021
Library
"""
# Extract the B or B+Is concepts and their entities, starts and ends.
# Input: list of list of dictionaries where each dictionary:
# {'word' : '', 'score' : '', entity : '', index : '', start : '', end : ''}
# Output: List of tuples with four elements. [(concept, entity, start, end)]
def extractionOfConcepts(annotations):
#VARIABLES
entities = []
complete_word = ''
start = 0
end = 0
entity = ''
i=0
for document in annotations:
i = i+1
for word in document:
#B
if word.get('entity')[0] == 'B':
#If there was a previous concept started, finish it
if len(complete_word) > 0:
entities.append((complete_word, entity, start, end))
complete_word = ''
start = 0
end = 0
entity = ''
#Strat a new concept
complete_word = word.get('word')
start = word.get('start')
end = word.get('end')
entity = word.get('entity')[2:]
#I
elif word.get('entity')[0] == 'I':
#if there isnt a B before, ignore
if len(complete_word) > 0:
complete_word = complete_word + ' ' + word.get('word')
#Update end
end = word.get('end')
#O
elif word.get('entity')[0] == 'O':
#If there was a previous concept started, finish it
if len(complete_word) > 0:
entities.append((complete_word, entity, start, end))
complete_word = ''
start = 0
end = 0
entity = ''
return entities
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment