搜索多个单词elasticsearch haystack

搜索多个单词elasticsearch haystack,第1张

搜索多个单词elasticsearch haystack

这个月我陷入了这个问题。

为了执行正确的查询,您将需要覆盖一些干草堆对象。我发现这篇文章对扩展Haystack的Elasticsearch后端很有帮助。刚开始时非常复杂,但是一旦了解了它的工作原理… :-)

博客文章介绍了如何实现elasticsearch的嵌套查询…好吧…我已经实现了基本的multi_match查询。

# -*- coding: utf-8 -*-from __future__ import absolute_importfrom django.conf import settingsfrom haystack.backends.elasticsearch_backend import (    ElasticsearchSearchBackend, ElasticsearchSearchEngine, ElasticsearchSearchQuery)from haystack.query import SearchQuerySetclass ElasticsearchEngineBackendCustom(ElasticsearchSearchBackend):    DEFAULT_ANALYZER = "snowball"    def __init__(self, connection_alias, **connection_options):        super(ElasticsearchEngineBackendCustom, self).__init__(connection_alias, **connection_options)        user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', {})        if user_settings: setattr(self, 'DEFAULT_SETTINGS', user_settings)        user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', '')        if user_analyzer: setattr(self, 'DEFAULT_ANALYZER', user_analyzer)    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,      fields='', highlight=False, facets=None,      date_facets=None, query_facets=None,      narrow_queries=None, spelling_query=None,      within=None, dwithin=None, distance_point=None,      models=None, limit_to_registered_models=None,      result_class=None, multi_match=None):        out = super(ElasticsearchEngineBackendCustom, self).build_search_kwargs(query_string, sort_by, start_offset,   end_offset,   fields, highlight, facets,   date_facets, query_facets,   narrow_queries, spelling_query,   within, dwithin, distance_point,   models, limit_to_registered_models,   result_class)        if multi_match:  out['query'] = {     'multi_match': {         'query': multi_match['query'],         'fields': multi_match['fields'],         'tie_breaker': multi_match['tie_breaker'],         'minimum_should_match': multi_match['minimum_should_match'],     } }        return out    def build_schema(self, fields):        content_field_name, mapping = super(ElasticsearchEngineBackendCustom, self).build_schema(fields)        for field_name, field_class in fields.items(): field_mapping = mapping[field_class.index_fieldname] if field_mapping['type'] == 'string' and field_class.indexed:     if not hasattr(field_class, 'facet_for') or field_class.field_type in ('ngram', 'edge_ngram'):         field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER) mapping.update({field_class.index_fieldname: field_mapping})        return content_field_name, mapping    def multi_match_run(self, query, fields, minimum_should_match, tie_breaker):        from elasticsearch_dsl import Search        from elasticsearch_dsl.query import MultiMatch        raw = Search().using(self.conn).query( MultiMatch(query=u'{}'.format(query), fields=fields, minimum_should_match=minimum_should_match, tie_breaker=tie_breaker)        ).execute()        return self._process_results(raw)class ElasticsearchSearchQueryCustom(ElasticsearchSearchQuery):    def multi_match(self, query, fields, minimum_should_match, tie_breaker):        results = self.backend.multi_match_run(query, fields, minimum_should_match, tie_breaker)        self._results = results.get('results', [])        self._hit_count = results.get('hits', 0)    def add_multi_match_query(self, query, fields, minimum_should_match, tie_breaker):        self.multi_match_query = { 'query': query, 'fields': fields, 'minimum_should_match': minimum_should_match, 'tie_breaker': tie_breaker        }    def build_params(self, spelling_query=None, **kwargs):        search_kwargs = super(ElasticsearchSearchQueryCustom, self).build_params(spelling_query, **kwargs)        if self.multi_match_query: search_kwargs['multi_match'] = self.multi_match_query        return search_kwargsclass ElasticsearchSearchQuerySetCustom(SearchQuerySet):    def multi_match(self, query, fields, minimum_should_match="35%", tie_breaker=0.3):        clone = self._clone()        clone.query.add_multi_match_query(query, fields, minimum_should_match, tie_breaker)        clone.query.multi_match(query, fields, minimum_should_match, tie_breaker)        return cloneclass ElasticsearchEngineCustom(ElasticsearchSearchEngine):    backend = ElasticsearchEngineBackendCustom    query = ElasticsearchSearchQueryCustom

如您所见,我曾经

elasticsearc-dsl
执行查询(MultiMatch),这句话概括了博客文章:
ElasticsearchSearchQuerySetCustom().multi_match(...)
调用取决于
ElasticsearchSearchQueryCustom
,取决于
ElasticsearchEngineBackendCustom

然后在您的设置中放入elasticsearch配置,例如:

ELASTICSEARCH_DEFAULT_ANALYZER = 'italian'ELASTICSEARCH_INDEX_SETTINGS = {    "settings": {[...]}}

您可以

ELASTICSEARCH_INDEX_SETTINGS
从语言分析器中获取您的语言

您还需要覆盖

SearchForm

# -*- coding: utf-8 -*-from __future__ import absolute_importfrom haystack.forms import SearchFormfrom .backend import ElasticsearchSearchQuerySetCustomclass SearchFormCustom(SearchForm):    def search(self):        query = self.searchqueryset.query.clean(self.cleaned_data.get('q'))        if not self.is_valid() or not query: return self.no_query_found()        sqs = ElasticsearchSearchQuerySetCustom().multi_match(query, ['title^8', 'text^0.5'])        return sqs

字段

title
text
必须在索引中,并且脱字符号用于对字段进行增强。

您需要覆盖haystack url模式才能使用自定义格式:

urlpatterns = patterns(    'search.views',    url('^$', search_view_factory(form_class=SearchFormCustom), name='haystack-search'),)

就是这样,HTH :-)

注意
不要使用,

result.object.something
而是使用索引上的字段,例如
result.tilte
,因为
result.object.tilte
打数据库!参见干草堆最佳实践



欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/4933559.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-11-12
下一篇 2022-11-13

发表评论

登录后才能评论

评论列表(0条)

保存