Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 52 additions & 110 deletions chatterbot/search.py
Original file line number Diff line number Diff line change
@@ -1,151 +1,93 @@
class IndexedTextSearch:
"""
:param statement_comparison_function: A comparison class.
Defaults to ``LevenshteinDistance``.
from chatterbot.comparisons import LevenshteinDistance, SpacySimilarity

:param search_page_size:
The maximum number of records to load into memory at a time when searching.
Defaults to 1000
"""

name = 'indexed_text_search'
class BaseTextSearch:
"""
Base class for performing text search using ChatterBot comparison functions.
"""

def __init__(self, chatbot, **kwargs):
from chatterbot.comparisons import LevenshteinDistance

self.chatbot = chatbot

statement_comparison_function = kwargs.get(
'statement_comparison_function',
LevenshteinDistance
)
# Use a better semantic comparator if available (SpacySimilarity is more contextual)
comparison_class = kwargs.get('statement_comparison_function', SpacySimilarity)

self.compare_statements = statement_comparison_function(
language=self.chatbot.tagger.language
)
self.compare_statements = comparison_class(language=self.chatbot.tagger.language)

self.search_page_size = kwargs.get(
'search_page_size', 1000
)
# Max results returned and how many records to load from storage at once
self.search_page_size = kwargs.get('search_page_size', 1000)
self.max_results = kwargs.get('max_results', 5)

def search(self, input_statement, **additional_parameters):
def _search_statements(self, input_statement, filter_parameters):
"""
Search for close matches to the input. Confidence scores for
subsequent results will order of increasing value.

:param input_statement: A statement.
:type input_statement: chatterbot.conversation.Statement

:param **additional_parameters: Additional parameters to be passed
to the ``filter`` method of the storage adapter when searching.

:rtype: Generator yielding one closest matching statement at a time.
Shared internal method to search for similar statements.
:param input_statement: The input statement to compare against known responses.
:param filter_parameters: Filter arguments for querying the storage.
:return: List of top-N similar statements with confidence scores.
"""
self.chatbot.logger.info('Beginning search for close text match')
self.chatbot.logger.info('Fetching candidate statements from storage...')
candidates = self.chatbot.storage.filter(**filter_parameters)

search_parameters = {
'search_in_response_to_contains': input_statement.search_text,
'persona_not_startswith': 'bot:',
'page_size': self.search_page_size
}

if additional_parameters:
search_parameters.update(additional_parameters)

statement_list = self.chatbot.storage.filter(**search_parameters)
results = []

best_confidence_so_far = 0
for candidate in candidates:
# Compare input statement to the 'in_response_to' field
comparison_text = candidate.in_response_to or candidate.text

self.chatbot.logger.info('Processing search results')

# Find the closest matching known statement
for statement in statement_list:
confidence = self.compare_statements.compare_text(
input_statement.text, statement.in_response_to
input_statement.text,
comparison_text
)

if confidence > best_confidence_so_far:
best_confidence_so_far = confidence
statement.confidence = confidence
# Store the confidence in the statement object
candidate.confidence = confidence
results.append(candidate)

self.chatbot.logger.info('Similar text found: {} {}'.format(
statement.in_response_to, confidence
))
# Sort results by descending confidence
results.sort(key=lambda stmt: stmt.confidence, reverse=True)

yield statement
# Limit to top-N best results
return results[:self.max_results]


class TextSearch:
class IndexedTextSearch(BaseTextSearch):
"""
:param statement_comparison_function: A comparison class.
Defaults to ``LevenshteinDistance``.

:param search_page_size:
The maximum number of records to load into memory at a time when searching.
Defaults to 1000
Indexed search that restricts candidates to those where input matches part of 'in_response_to'.
"""

name = 'text_search'

def __init__(self, chatbot, **kwargs):
from chatterbot.comparisons import LevenshteinDistance

self.chatbot = chatbot

statement_comparison_function = kwargs.get(
'statement_comparison_function',
LevenshteinDistance
)

self.compare_statements = statement_comparison_function(
language=self.chatbot.tagger.language
)

self.search_page_size = kwargs.get(
'search_page_size', 1000
)
name = 'indexed_text_search'

def search(self, input_statement, **additional_parameters):
"""
Search for close matches to the input. Confidence scores for
subsequent results will order of increasing value.

:param input_statement: A statement.
:type input_statement: chatterbot.conversation.Statement

:param **additional_parameters: Additional parameters to be passed
to the ``filter`` method of the storage adapter when searching.

:rtype: Generator yielding one closest matching statement at a time.
"""
self.chatbot.logger.info('Beginning search for close text match')
self.chatbot.logger.info('Performing indexed text search...')

search_parameters = {
'search_in_response_to_contains': input_statement.search_text,
'persona_not_startswith': 'bot:',
'page_size': self.search_page_size
}

if additional_parameters:
search_parameters.update(additional_parameters)

statement_list = self.chatbot.storage.filter(**search_parameters)
return self._search_statements(input_statement, search_parameters)


best_confidence_so_far = 0
class TextSearch(BaseTextSearch):
"""
General search that compares input with all known responses.
"""

self.chatbot.logger.info('Processing search results')
name = 'text_search'

# Find the closest matching known statement
for statement in statement_list:
confidence = self.compare_statements.compare_text(
input_statement.text, statement.in_response_to
)
def search(self, input_statement, **additional_parameters):
self.chatbot.logger.info('Performing general text search...')

if confidence > best_confidence_so_far:
best_confidence_so_far = confidence
statement.confidence = confidence
search_parameters = {
'persona_not_startswith': 'bot:',
'page_size': self.search_page_size
}

self.chatbot.logger.info('Similar text found: {} {}'.format(
statement.text, confidence
))
if additional_parameters:
search_parameters.update(additional_parameters)

yield statement
return self._search_statements(input_statement, search_parameters)
Loading