telemeta.util.backend module
# -*- coding: utf-8 -*- from haystack.backends.elasticsearch_backend import * import string import re #Regex for remove punctuations when index fields and #when submit a query search regex_space_punc = "[\\s" + re.escape(string.punctuation) + "]+" class CustomElasticBackend(ElasticsearchSearchBackend): #This setup modifies the mapping of ES in order to have better results #in the search engine. Add 2 analyzers (for indexing and searching): # -whitespace_asciifolding_analyzer : remove punctuations and convert # all terms into lowercase and escape accents. # -lowercase_analyzer : convert in lowercase the word (used by code field # in order to preserve undersore of codes) def setup(self): DEFAULT_FIELD_MAPPING['analyzer']='whitespace_asciifolding_analyzer' FIELD_MAPPINGS['keyword'] = {'type': 'string', 'analyzer':'lowercase_analyzer'} eb = super(CustomElasticBackend, self) eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('tokenizer')['esc_scape_tokenizer']=\ {"type": "pattern", "pattern": regex_space_punc} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['whitespace_asciifolding_analyzer']=\ {"type": "custom", "tokenizer": "esc_scape_tokenizer", "filter": ["lowercase", "asciifolding"]} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['lowercase_analyzer'] = \ {"type": "custom", "tokenizer": "keyword", "filter": ["lowercase"]} eb.setup() class CustomElasticSearchQuery(ElasticsearchSearchQuery): #Custom search query for remove all punctuations characters and #convert to string for boolean fields #Used when enter the query def build_query_fragment(self, field, filter_type, value): if isinstance(value, bool): value = str(value) if not isinstance(value, int) and field !='code': value = re.sub(regex_space_punc, " ", value) valeur = super(CustomElasticSearchQuery, self).build_query_fragment(field, filter_type, value) return valeur #The custom engine that determine backednd and search_query class CustomElasticEngine(ElasticsearchSearchEngine): backend = CustomElasticBackend query = CustomElasticSearchQuery
Module variables
var DATETIME_REGEX
var DEFAULT_FIELD_MAPPING
var DEFAULT_OPERATOR
var DJANGO_CT
var DJANGO_ID
var FIELD_MAPPINGS
var ID
var regex_space_punc
Classes
class CustomElasticBackend
class CustomElasticBackend(ElasticsearchSearchBackend): #This setup modifies the mapping of ES in order to have better results #in the search engine. Add 2 analyzers (for indexing and searching): # -whitespace_asciifolding_analyzer : remove punctuations and convert # all terms into lowercase and escape accents. # -lowercase_analyzer : convert in lowercase the word (used by code field # in order to preserve undersore of codes) def setup(self): DEFAULT_FIELD_MAPPING['analyzer']='whitespace_asciifolding_analyzer' FIELD_MAPPINGS['keyword'] = {'type': 'string', 'analyzer':'lowercase_analyzer'} eb = super(CustomElasticBackend, self) eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('tokenizer')['esc_scape_tokenizer']=\ {"type": "pattern", "pattern": regex_space_punc} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['whitespace_asciifolding_analyzer']=\ {"type": "custom", "tokenizer": "esc_scape_tokenizer", "filter": ["lowercase", "asciifolding"]} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['lowercase_analyzer'] = \ {"type": "custom", "tokenizer": "keyword", "filter": ["lowercase"]} eb.setup()
Ancestors (in MRO)
- CustomElasticBackend
- haystack.backends.elasticsearch_backend.ElasticsearchSearchBackend
- haystack.backends.BaseSearchBackend
- __builtin__.object
Class variables
var DEFAULT_SETTINGS
var RESERVED_CHARACTERS
var RESERVED_WORDS
Methods
def __init__(
self, connection_alias, **connection_options)
def __init__(self, connection_alias, **connection_options): super(ElasticsearchSearchBackend, self).__init__(connection_alias, **connection_options) if not 'URL' in connection_options: raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) if not 'INDEX_NAME' in connection_options: raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) self.index_name = connection_options['INDEX_NAME'] self.log = logging.getLogger('haystack') self.setup_complete = False self.existing_mapping = {}
def build_models_list(
self)
Builds a list of models for searching.
The search
method should use this and the django_ct
field to
narrow the results (unless the user indicates not to). This helps ignore
any results that are not currently handled models and ensures
consistent caching.
def build_models_list(self): """ Builds a list of models for searching. The ``search`` method should use this and the ``django_ct`` field to narrow the results (unless the user indicates not to). This helps ignore any results that are not currently handled models and ensures consistent caching. """ from haystack import connections models = [] for model in connections[self.connection_alias].get_unified_index().get_indexed_models(): models.append(get_model_ct(model)) return models
def build_schema(
self, fields)
def build_schema(self, fields): content_field_name = '' mapping = { DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, } for field_name, field_class in fields.items(): field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() if field_class.boost != 1.0: field_mapping['boost'] = field_class.boost if field_class.document is True: content_field_name = field_class.index_fieldname # Do this last to override `text` fields. if field_mapping['type'] == 'string': if field_class.indexed is False or hasattr(field_class, 'facet_for'): field_mapping['index'] = 'not_analyzed' del field_mapping['analyzer'] mapping[field_class.index_fieldname] = field_mapping return (content_field_name, mapping)
def build_search_kwargs(
self, query_string, sort_by=None, start_offset=0, end_offset=None, fields=u'', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None)
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({ 'fquery': { 'query': { 'query_string': { 'query': q }, }, '_cache': True, } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { 'dist': dwithin['distance'].km, 'unit': "km" } else: distance = dwithin['distance'].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['filtered']["filter"] = filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} return kwargs
def clear(
self, models=None, commit=True)
def clear(self, models=None, commit=True): # We actually don't want to do this here, as mappings could be # very different. # if not self.setup_complete: # self.setup() if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete by query in Elasticsearch asssumes you're dealing with # a ``query`` root object. :/ query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True)
def extract_file_contents(
self, file_obj)
Hook to allow backends which support rich-content types such as PDF, Word, etc. extraction to process the provided file object and return the contents for indexing
Returns None if metadata cannot be extracted; otherwise returns a dictionary containing at least two keys:
:contents: Extracted full-text content, if applicable :metadata: key:value pairs of text strings
def extract_file_contents(self, file_obj): """ Hook to allow backends which support rich-content types such as PDF, Word, etc. extraction to process the provided file object and return the contents for indexing Returns None if metadata cannot be extracted; otherwise returns a dictionary containing at least two keys: :contents: Extracted full-text content, if applicable :metadata: key:value pairs of text strings """ raise NotImplementedError("Subclasses must provide a way to extract metadata via the 'extract' method if supported by the backend.")
def more_like_this(
self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs)
def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections if not self.setup_complete: self.setup() # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = {} if start_offset is not None: params['search_from'] = start_offset if end_offset is not None: params['search_size'] = end_offset - start_offset doc_id = get_identifier(model_instance) try: raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e, exc_info=True) raw_results = {} return self._process_results(raw_results, result_class=result_class)
def prep_value(
self, value)
Hook to give the backend a chance to prep an attribute value before sending it to the search engine. By default, just force it to unicode.
def prep_value(self, value): """ Hook to give the backend a chance to prep an attribute value before sending it to the search engine. By default, just force it to unicode. """ return force_text(value)
def remove(
self, obj_or_string, commit=True)
def remove(self, obj_or_string, commit=True): doc_id = get_identifier(obj_or_string) if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) return try: self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) if commit: self.conn.indices.refresh(index=self.index_name) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True)
def search(
obj, query_string, *args, **kwargs)
def wrapper(obj, query_string, *args, **kwargs): start = time() try: return func(obj, query_string, *args, **kwargs) finally: stop = time() if settings.DEBUG: from haystack import connections connections[obj.connection_alias].queries.append({ 'query_string': query_string, 'additional_args': args, 'additional_kwargs': kwargs, 'time': "%.3f" % (stop - start), 'start': start, 'stop': stop, })
def setup(
self)
def setup(self): DEFAULT_FIELD_MAPPING['analyzer']='whitespace_asciifolding_analyzer' FIELD_MAPPINGS['keyword'] = {'type': 'string', 'analyzer':'lowercase_analyzer'} eb = super(CustomElasticBackend, self) eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('tokenizer')['esc_scape_tokenizer']=\ {"type": "pattern", "pattern": regex_space_punc} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['whitespace_asciifolding_analyzer']=\ {"type": "custom", "tokenizer": "esc_scape_tokenizer", "filter": ["lowercase", "asciifolding"]} eb.DEFAULT_SETTINGS.get('settings').get('analysis').get('analyzer')['lowercase_analyzer'] = \ {"type": "custom", "tokenizer": "keyword", "filter": ["lowercase"]} eb.setup()
def update(
self, index, iterable, commit=True)
def update(self, index, iterable, commit=True): if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) return prepped_docs = [] for obj in iterable: try: prepped_data = index.full_prepare(obj) final_data = {} # Convert the data to make sure it's happy. for key, value in prepped_data.items(): final_data[key] = self._from_python(value) final_data['_id'] = final_data[ID] prepped_docs.append(final_data) except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) except elasticsearch.TransportError as e: if not self.silently_fail: raise # We'll log the object identifier but won't include the actual object # to avoid the possibility of that generating encoding errors while # processing the log message: self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={"data": {"index": index, "object": get_identifier(obj)}}) bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') if commit: self.conn.indices.refresh(index=self.index_name)
class CustomElasticEngine
class CustomElasticEngine(ElasticsearchSearchEngine): backend = CustomElasticBackend query = CustomElasticSearchQuery
Ancestors (in MRO)
- CustomElasticEngine
- haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine
- haystack.backends.BaseEngine
- __builtin__.object
Class variables
var backend
var query
var unified_index
Methods
def __init__(
self, using=None)
def __init__(self, using=None): if using is None: using = DEFAULT_ALIAS self.using = using self.options = settings.HAYSTACK_CONNECTIONS.get(self.using, {}) self.queries = [] self._index = None self._backend = None
def get_backend(
self)
def get_backend(self): if self._backend is None: self._backend = self.backend(self.using, **self.options) return self._backend
def get_query(
self)
def get_query(self): return self.query(using=self.using)
def get_unified_index(
self)
def get_unified_index(self): if self._index is None: self._index = self.unified_index(self.options.get('EXCLUDED_INDEXES', [])) return self._index
def reset_queries(
self)
def reset_queries(self): self.queries = []
class CustomElasticSearchQuery
class CustomElasticSearchQuery(ElasticsearchSearchQuery): #Custom search query for remove all punctuations characters and #convert to string for boolean fields #Used when enter the query def build_query_fragment(self, field, filter_type, value): if isinstance(value, bool): value = str(value) if not isinstance(value, int) and field !='code': value = re.sub(regex_space_punc, " ", value) valeur = super(CustomElasticSearchQuery, self).build_query_fragment(field, filter_type, value) return valeur
Ancestors (in MRO)
- CustomElasticSearchQuery
- haystack.backends.elasticsearch_backend.ElasticsearchSearchQuery
- haystack.backends.BaseSearchQuery
- __builtin__.object
Methods
def __init__(
self, using=u'default')
def __init__(self, using=DEFAULT_ALIAS): self.query_filter = SearchNode() self.order_by = [] self.models = set() self.boost = {} self.start_offset = 0 self.end_offset = None self.highlight = False self.facets = {} self.date_facets = {} self.query_facets = [] self.narrow_queries = set() #: If defined, fields should be a list of field names - no other values #: will be retrieved so the caller must be careful to include django_ct #: and django_id when using code which expects those to be included in #: the results self.fields = [] # Geospatial-related information self.within = {} self.dwithin = {} self.distance_point = {} # Internal. self._raw_query = None self._raw_query_params = {} self._more_like_this = False self._mlt_instance = None self._results = None self._hit_count = None self._facet_counts = None self._stats = None self._spelling_suggestion = None self.result_class = SearchResult self.stats = {} from haystack import connections self._using = using self.backend = connections[self._using].get_backend()
def add_boost(
self, term, boost_value)
Adds a boosted term and the amount to boost it to the query.
def add_boost(self, term, boost_value): """Adds a boosted term and the amount to boost it to the query.""" self.boost[term] = boost_value
def add_date_facet(
self, field, start_date, end_date, gap_by, gap_amount=1)
Adds a date-based facet on a field.
def add_date_facet(self, field, start_date, end_date, gap_by, gap_amount=1): """Adds a date-based facet on a field.""" from haystack import connections if not gap_by in VALID_GAPS: raise FacetingError("The gap_by ('%s') must be one of the following: %s." % (gap_by, ', '.join(VALID_GAPS))) details = { 'start_date': start_date, 'end_date': end_date, 'gap_by': gap_by, 'gap_amount': gap_amount, } self.date_facets[connections[self._using].get_unified_index().get_facet_fieldname(field)] = details
def add_distance(
self, field, point)
Denotes that results should include distance measurements from the point passed in.
def add_distance(self, field, point): """ Denotes that results should include distance measurements from the point passed in. """ from haystack.utils.geo import ensure_point self.distance_point = { 'field': field, 'point': ensure_point(point), }
def add_dwithin(
self, field, point, distance)
Adds radius-based parameters to search query.
def add_dwithin(self, field, point, distance): """Adds radius-based parameters to search query.""" from haystack.utils.geo import ensure_point, ensure_distance self.dwithin = { 'field': field, 'point': ensure_point(point), 'distance': ensure_distance(distance), }
def add_field_facet(
self, field, **options)
Adds a regular facet on a field.
def add_field_facet(self, field, **options): """Adds a regular facet on a field.""" from haystack import connections field_name = connections[self._using].get_unified_index().get_facet_fieldname(field) self.facets[field_name] = options.copy()
def add_filter(
self, query_filter, use_or=False)
Adds a SQ to the current query.
def add_filter(self, query_filter, use_or=False): """ Adds a SQ to the current query. """ if use_or: connector = SQ.OR else: connector = SQ.AND if self.query_filter and query_filter.connector != connector and len(query_filter) > 1: self.query_filter.start_subtree(connector) subtree = True else: subtree = False for child in query_filter.children: if isinstance(child, tree.Node): self.query_filter.start_subtree(connector) self.add_filter(child) self.query_filter.end_subtree() else: expression, value = child self.query_filter.add((expression, value), connector) connector = query_filter.connector if query_filter.negated: self.query_filter.negate() if subtree: self.query_filter.end_subtree()
def add_highlight(
self)
Adds highlighting to the search results.
def add_highlight(self): """Adds highlighting to the search results.""" self.highlight = True
def add_model(
self, model)
Restricts the query requiring matches in the given model.
This builds upon previous additions, so you can limit to multiple models by chaining this method several times.
def add_model(self, model): """ Restricts the query requiring matches in the given model. This builds upon previous additions, so you can limit to multiple models by chaining this method several times. """ if not isinstance(model, ModelBase): raise AttributeError('The model being added to the query must derive from Model.') self.models.add(model)
def add_narrow_query(
self, query)
Narrows a search to a subset of all documents per the query.
Generally used in conjunction with faceting.
def add_narrow_query(self, query): """ Narrows a search to a subset of all documents per the query. Generally used in conjunction with faceting. """ self.narrow_queries.add(query)
def add_order_by(
self, field)
Orders the search result by a field.
def add_order_by(self, field): """Orders the search result by a field.""" self.order_by.append(field)
def add_query_facet(
self, field, query)
Adds a query facet on a field.
def add_query_facet(self, field, query): """Adds a query facet on a field.""" from haystack import connections self.query_facets.append((connections[self._using].get_unified_index().get_facet_fieldname(field), query))
def add_stats_query(
self, stats_field, stats_facets)
Adds stats and stats_facets queries for the Solr backend.
def add_stats_query(self,stats_field,stats_facets): """Adds stats and stats_facets queries for the Solr backend.""" self.stats[stats_field] = stats_facets
def add_within(
self, field, point_1, point_2)
Adds bounding box parameters to search query.
def add_within(self, field, point_1, point_2): """Adds bounding box parameters to search query.""" from haystack.utils.geo import ensure_point self.within = { 'field': field, 'point_1': ensure_point(point_1), 'point_2': ensure_point(point_2), }
def boost_fragment(
self, boost_word, boost_value)
Generates query fragment for boosting a single word/value pair.
def boost_fragment(self, boost_word, boost_value): """Generates query fragment for boosting a single word/value pair.""" return "%s^%s" % (boost_word, boost_value)
def build_alt_parser_query(
self, parser_name, query_string=u'', **kwargs)
def build_alt_parser_query(self, parser_name, query_string='', **kwargs): if query_string: kwargs['v'] = query_string kwarg_bits = [] for key in sorted(kwargs.keys()): if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) else: kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits))
def build_exact_query(
self, query_string)
def build_exact_query(self, query_string): return u'"%s"' % query_string
def build_not_query(
self, query_string)
def build_not_query(self, query_string): if ' ' in query_string: query_string = "(%s)" % query_string return u"NOT %s" % query_string
def build_params(
self, spelling_query=None, **kwargs)
def build_params(self, spelling_query=None, **kwargs): search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class } order_by_list = None if self.order_by: if order_by_list is None: order_by_list = [] for field in self.order_by: direction = 'asc' if field.startswith('-'): direction = 'desc' field = field[1:] order_by_list.append((field, direction)) search_kwargs['sort_by'] = order_by_list if self.date_facets: search_kwargs['date_facets'] = self.date_facets if self.distance_point: search_kwargs['distance_point'] = self.distance_point if self.dwithin: search_kwargs['dwithin'] = self.dwithin if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset if self.facets: search_kwargs['facets'] = self.facets if self.fields: search_kwargs['fields'] = self.fields if self.highlight: search_kwargs['highlight'] = self.highlight if self.models: search_kwargs['models'] = self.models if self.narrow_queries: search_kwargs['narrow_queries'] = self.narrow_queries if self.query_facets: search_kwargs['query_facets'] = self.query_facets if self.within: search_kwargs['within'] = self.within if spelling_query: search_kwargs['spelling_query'] = spelling_query return search_kwargs
def build_query(
self)
Interprets the collected query metadata and builds the final query to be sent to the backend.
def build_query(self): """ Interprets the collected query metadata and builds the final query to be sent to the backend. """ final_query = self.query_filter.as_query_string(self.build_query_fragment) if not final_query: # Match all. final_query = self.matching_all_fragment() if self.boost: boost_list = [] for boost_word, boost_value in self.boost.items(): boost_list.append(self.boost_fragment(boost_word, boost_value)) final_query = "%s %s" % (final_query, " ".join(boost_list)) return final_query
def build_query_fragment(
self, field, filter_type, value)
def build_query_fragment(self, field, filter_type, value): if isinstance(value, bool): value = str(value) if not isinstance(value, int) and field !='code': value = re.sub(regex_space_punc, " ", value) valeur = super(CustomElasticSearchQuery, self).build_query_fragment(field, filter_type, value) return valeur
def clean(
self, query_fragment)
Provides a mechanism for sanitizing user input before presenting the value to the backend.
A basic (override-able) implementation is provided.
def clean(self, query_fragment): """ Provides a mechanism for sanitizing user input before presenting the value to the backend. A basic (override-able) implementation is provided. """ if not isinstance(query_fragment, six.string_types): return query_fragment words = query_fragment.split() cleaned_words = [] for word in words: if word in self.backend.RESERVED_WORDS: word = word.replace(word, word.lower()) for char in self.backend.RESERVED_CHARACTERS: word = word.replace(char, '\\%s' % char) cleaned_words.append(word) return ' '.join(cleaned_words)
def clear_limits(
self)
Clears any existing limits.
def clear_limits(self): """Clears any existing limits.""" self.start_offset, self.end_offset = 0, None
def clear_order_by(
self)
Clears out all ordering that has been already added, reverting the query to relevancy.
def clear_order_by(self): """ Clears out all ordering that has been already added, reverting the query to relevancy. """ self.order_by = []
def combine(
self, rhs, connector=u'AND')
def combine(self, rhs, connector=SQ.AND): if connector == SQ.AND: self.add_filter(rhs.query_filter) elif connector == SQ.OR: self.add_filter(rhs.query_filter, use_or=True)
def get_count(
self)
Returns the number of results the backend found for the query.
If the query has not been run, this will execute the query and store the results.
def get_count(self): """ Returns the number of results the backend found for the query. If the query has not been run, this will execute the query and store the results. """ if self._hit_count is None: # Limit the slice to 1 so we get a count without consuming # everything. if not self.end_offset: self.end_offset = 1 if self._more_like_this: # Special case for MLT. self.run_mlt() elif self._raw_query: # Special case for raw queries. self.run_raw() else: self.run() return self._hit_count
def get_facet_counts(
self)
Returns the facet counts received from the backend.
If the query has not been run, this will execute the query and store the results.
def get_facet_counts(self): """ Returns the facet counts received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._facet_counts is None: self.run() return self._facet_counts
def get_results(
self, **kwargs)
Returns the results received from the backend.
If the query has not been run, this will execute the query and store the results.
def get_results(self, **kwargs): """ Returns the results received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._results is None: if self._more_like_this: # Special case for MLT. self.run_mlt(**kwargs) elif self._raw_query: # Special case for raw queries. self.run_raw(**kwargs) else: self.run(**kwargs) return self._results
def get_spelling_suggestion(
self, preferred_query=None)
Returns the spelling suggestion received from the backend.
If the query has not been run, this will execute the query and store the results.
def get_spelling_suggestion(self, preferred_query=None): """ Returns the spelling suggestion received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._spelling_suggestion is None: self.run(spelling_query=preferred_query) return self._spelling_suggestion
def get_stats(
self)
Returns the stats received from the backend.
If the query has not been run, this will execute the query and store the results
def get_stats(self): """ Returns the stats received from the backend. If the query has not been run, this will execute the query and store the results """ if self._stats is None: self.run() return self._stats
def has_run(
self)
Indicates if any query has been been run.
def has_run(self): """Indicates if any query has been been run.""" return None not in (self._results, self._hit_count)
def matching_all_fragment(
self)
def matching_all_fragment(self): return '*:*'
def more_like_this(
self, model_instance)
Allows backends with support for "More Like This" to return results similar to the provided instance.
def more_like_this(self, model_instance): """ Allows backends with support for "More Like This" to return results similar to the provided instance. """ self._more_like_this = True self._mlt_instance = model_instance
def post_process_facets(
self, results)
def post_process_facets(self, results): # Handle renaming the facet fields. Undecorate and all that. from haystack import connections revised_facets = {} field_data = connections[self._using].get_unified_index().all_searchfields() for facet_type, field_details in results.get('facets', {}).items(): temp_facets = {} for field, field_facets in field_details.items(): fieldname = field if field in field_data and hasattr(field_data[field], 'get_facet_for_name'): fieldname = field_data[field].get_facet_for_name() temp_facets[fieldname] = field_facets revised_facets[facet_type] = temp_facets return revised_facets
def raw_search(
self, query_string, **kwargs)
Runs a raw query (no parsing) against the backend.
This method causes the SearchQuery to ignore the standard query generating facilities, running only what was provided instead.
Note that any kwargs passed along will override anything provided
to the rest of the SearchQuerySet
.
def raw_search(self, query_string, **kwargs): """ Runs a raw query (no parsing) against the backend. This method causes the SearchQuery to ignore the standard query generating facilities, running only what was provided instead. Note that any kwargs passed along will override anything provided to the rest of the ``SearchQuerySet``. """ self._raw_query = query_string self._raw_query_params = kwargs
def run(
self, spelling_query=None, **kwargs)
Builds and executes the query. Returns a list of search results.
def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() search_kwargs = self.build_params(spelling_query, **kwargs) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._spelling_suggestion = results.get('spelling_suggestion', None)
def run_mlt(
self, **kwargs)
Builds and executes the query. Returns a list of search results.
def run_mlt(self, **kwargs): """Builds and executes the query. Returns a list of search results.""" if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") additional_query_string = self.build_query() search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class, 'models': self.models } if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset - self.start_offset results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0)
def run_raw(
self, **kwargs)
Executes a raw query. Returns a list of search results.
def run_raw(self, **kwargs): """Executes a raw query. Returns a list of search results.""" search_kwargs = self.build_params() search_kwargs.update(self._raw_query_params) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(self._raw_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = results.get('facets', {}) self._spelling_suggestion = results.get('spelling_suggestion', None)
def set_limits(
self, low=None, high=None)
Restricts the query by altering either the start, end or both offsets.
def set_limits(self, low=None, high=None): """Restricts the query by altering either the start, end or both offsets.""" if low is not None: self.start_offset = int(low) if high is not None: self.end_offset = int(high)
def set_result_class(
self, klass)
Sets the result class to use for results.
Overrides any previous usages. If None
is provided, Haystack will
revert back to the default SearchResult
object.
def set_result_class(self, klass): """ Sets the result class to use for results. Overrides any previous usages. If ``None`` is provided, Haystack will revert back to the default ``SearchResult`` object. """ if klass is None: klass = SearchResult self.result_class = klass
def using(
self, using=None)
Allows for overriding which connection should be used. This disables the use of routers when performing the query.
If None
is provided, it has no effect on what backend is used.
def using(self, using=None): """ Allows for overriding which connection should be used. This disables the use of routers when performing the query. If ``None`` is provided, it has no effect on what backend is used. """ return self._clone(using=using)