Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize complexity of filter out unwanted recognizers from O(n*m ) to O(n) #1523

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -170,23 +170,25 @@ def get_recognizers(
if language == rec.supported_language
]
else:
all_entity_recognizers = dict()
for rec in all_possible_recognizers:
if language == rec.supported_language:
if type(rec.supported_entities) == list and len(rec.supported_entities) > 0:
for supported_entity in rec.supported_entities:
self.add_recognizer_map(all_entity_recognizers, supported_entity, rec)
elif type(rec.supported_entities) == str:
self.add_recognizer_map(all_entity_recognizers, supported_entity, rec)

for entity in entities:
subset = [
rec
for rec in all_possible_recognizers
if entity in rec.supported_entities
and language == rec.supported_language
]

if not subset:
if entity not in all_entity_recognizers:
logger.warning(
"Entity %s doesn't have the corresponding"
" recognizer in language : %s",
entity,
language,
)
else:
to_return.update(set(subset))
to_return.update(all_entity_recognizers[entity])

logger.debug(
"Returning a total of %s recognizers",
Expand All @@ -198,6 +200,12 @@ def get_recognizers(

return list(to_return)

def add_recognizer_map(self, all_entity_recognizers, supported_entity, rec):
if supported_entity in all_entity_recognizers:
all_entity_recognizers[supported_entity].add(rec)
else:
all_entity_recognizers[supported_entity] = {rec}

def add_recognizer(self, recognizer: EntityRecognizer) -> None:
"""
Add a new recognizer to the list of recognizers.
Expand Down