ElasticSearch


ElasticSearch 查询

match all 查询

  • 匹配所有文档,_score 为 1

    GET policy_info/_search
    {
        "query": {
            "match_all": {}
        }
    }
    {
      "took": 0,
      "timed_out": false,
      "_shards": {
        "total": 3,
        "successful": 3,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 3556,
          "relation": "eq"
        },
        "max_score": 1,
        "hits": [
          {
            "_index": "policy_info",
            "_id": "5",
            "_score": 1,
            "_source": {
    ...
    ...
  • boost参数可改变_score的值

    GET policy_info/_search
    {
      "query": {
        "match_all": {"boost": 1.2 }
      }
    }
    {
          ...
        "max_score": 1.2,
        "hits": [
          {
            "_index": "policy_info",
            "_id": "5",
            "_score": 1.2,
            "_source": {
                ...
            }
          }
    ...
    }
  • match none查询,反转match all查询结果,一个文档也不匹配

    GET policy_info/_search
    {
      "query": {
        "match_none": {}
      }
    }
    {
      "took": 0,
      "timed_out": false,
      "_shards": {
        "total": 3,
        "successful": 3,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 0,
          "relation": "eq"
        },
        "max_score": null,
        "hits": []
      }
    }

Term 查询

term查询

  • 返回和提供词严格匹配的文档,不要对text字段使用term查询,使用GET policy_info/_mapping查看索引结构;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "term": {
          "field": {
            "value": 12,
            "boost": 2,
            "case_insensitive": true
          }
        }
      }
    }

    field[Required, string]:查询字段

    • value[Requiredstring]:提供的精准匹配的值,包括空格,大小写敏感;
    • boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;
    • case_insensitive[Optional, bool]:是否大小写敏感,默认true;
GET policy_info/_search
{
  "query": {
    "term": {
      "id": {
        "value": 12,
        "boost": 2
      }
    }
  }
}
{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 2,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "12",
        "_score": 2,
        "_source": {
          "id": 12,
          "qdu_id": "bc58c81c7b2143819487053cb50f9acf",
          ...
          "create_time": "2024-09-06 16:15:41",
          "row_update_time": "2024-09-06 16:15:41"
        }
      }
    ]
  }
}

terms 查询

  • 返回和提供词严格匹配的文档,可提供多个值,类似SQL or
  • 参数:

    GET xxx/_search
    {
      "query": {
        "terms": {
          "field": ['xxx', 'xxx'],
          "boost": 2
        }
      }
    }

    field[Required, List[string]]:查询字段;

    boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;

GET policy_info/_search
{
  "query": {
    "terms": {
      "id": [12, 32],
      "boost": 2
    }
  }
}
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 2,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "12",
        "_score": 2,
        "_source": {
          "id": 12,
          "create_time": "2024-09-06 16:15:41",
          "row_update_time": "2024-09-06 16:15:41"
        }
      },
      {
        "_index": "policy_info",
        "_id": "32",
        "_score": 2,
        "_source": {
          "id": 32,
          "create_time": "2024-09-06 16:15:47",
          "row_update_time": "2024-09-06 16:15:47"
        }
      }
    ]
  }
}

Range 查询

  • 查找指定范围内的文档;不能用来查textkeyword字段,查询日期时可用日期函数
  • 参数:

    GET xxx/_search
    {
      "query": {
        "range": {
          "field": {
            "gte": "2024-09-06",
            "lte": "2024-09-06",
            "format": "yyyy-MM-dd",
            "time_zone": "+01:00",
            "relation": "CONTAINS"
          }
        }
      }
    }

    field[Required, object]:查询字段

    • gt[Optional]:大于
    • gte[Optional]:大于等于
    • lt[Optional]:小于
    • lte[Optional]:小于等于
    • format[Optional, string]:在处理日期字段时,用来处理日期格式,不写默认使用mapping中的格式;
    • relation[Optional, string]:范围查询如何匹配范围字段的值

      • INTERSECTS[default]:只要查询的范围与文档的范围有任何重叠,文档就会被匹配;
      • CONTAINS:当文档的范围完全包含查询的范围时,文档才会被匹配;
      • WITHIN:当文档的范围完全被包含查询的范围内时,文档才会被匹配;
    • time_zone[Optional, string]:国际协调时间(UTC)偏移量或者IANA时区;合法值+01:00-09:00Asia/Shanghai
    • boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;
GET policy_info/_search
{
  "query": {
    "range": {
      "create_time": {
        "gte": "2024-09-06",
        "lte": "2024-09-06",
        "format": "yyyy-MM-dd",
        "time_zone": "+01:00",
        "relation": "CONTAINS"
      }
    }
  }
}
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1407,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "5",
        "_score": 1,
        "_source": {
          "id": 5,
          ...
          "create_time": "2024-09-06 16:15:39",
          "row_update_time": "2024-09-06 16:15:39"
        }
      },

IDs 查询

  • 主键查询
  • 参数:

    GET xxx/_search
    {
      "query": {
        "ids" : {
          "values" : ["1", "4", "100"]
        }
      }
    }

    value[RequiredList[string]]:主键列表;

GET policy_info/_search
{
  "query": {
    "ids": {
      "values": [1]
    }
  }
}
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "1",
        "_score": 1,
        "_source": {
          "id": 1,
          ...
          "create_time": "2024-09-06 16:15:38",
          "row_update_time": "2024-09-06 16:15:38"
        }
      }
      ...

Exists 查询

  • exists 查询会匹配那些在指定字段上有值的文档;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "exists": {
          "field": "xxx"
        }
      }
    }

    field[Required, string]:查询的字段

    • 如果json值是null[],则认为不存在;
    • 空字符串,包含null和另一个值的数组,自定义空值,则认为存在;

Fuzzy 查询

  • 返回查询字段的值和提供值相似的文档,用莱文斯坦距离衡量;指两个字串之间,由一个转成另一个所需的最少编辑操作次数;允许的编辑操作包括:将一个字符替换成另一个字符;插入一个字符;删除一个字符;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "fuzzy": {
          "field": {
            "value": "xxx",
            "fuzziness": 1,
            "max_expansions": 50,
            "prefix_length": 0,
            "transpositions": true,
          }
        },
      }
    }

    field[Required, string]:查询字段

    • value[Requiredstring]:提供的匹配的值;
    • fuzziness[Optionalstring]:最大可编辑距离;[Common options | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness)

      • 明确指定允许的编辑距离;只能是 0,1,2;
      • AUTO 是一种动态计算模糊度的方式,基于查询字符串的长度来确定允许的编辑距离;

        • 字符串长度为 0 到 2:不允许编辑(等同于 fuzziness: 0);
        • 字符串长度为 3 到 5:允许一个编辑(等同于 fuzziness: 1);
        • 字符串长度大于 5:允许两个编辑(等同于 fuzziness: 2);
    • max_expansions[Optionalinteger]:模糊查询过程中生成的变体词语的最大数量;默认50;
    • prefix_length[Optionalinteger]:前缀长度,表示在进行模糊匹配之前,词语的前缀部分必须完全匹配,默认0;
    • transpositions[OptionalBoolean]:是否允许字符换位,默认true;
    • rewrite[Optionalstring]: rewrite parameter.
GET township_info_new/_search
{
  "query": {
    "fuzzy": {
      "township_code": {
        "value": "5404242070000",
        "fuzziness": 2,
        "max_expansions": 50,
        "prefix_length": 3,
        "transpositions": true
      }
    }
  }
}
{
  "took": 151,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 19,
      "relation": "eq"
    },
    "max_score": 8.307324,
    "hits": [
      {
        "_index": "township_info_new",
        "_id": "36004",
        "_score": 8.307324,
        "_source": {
        ...
        ...

Prefix 查询

  • 返回字段匹配指定前缀的文档;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "prefix": {
          "field": {
            "value": "xxx",
            "rewrite": "constant_score",
            "case_insensitive": true
          }
        }
      }
    }

    field[Required, string]:查询字段

    • value[Requiredstring]:提供的匹配的值;
    • rewrite[Optionalstring]: rewrite parameter.
    • case_insensitive[Optional, bool]:是否大小写敏感,默认true;
GET policy_info/_search
{
  "query": {
    "prefix": {
      "qdu_id": {
        "value": "552",
        "rewrite": "constant_score",
        "case_insensitive": true
      }
    }
  }
}
{
  "took": 62,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "5",
        "_score": 1,
        "_source": {
          "id": 5,
          "qdu_id": "552d2db621224195b78a8868bff8f330",
          ...
          "create_time": "2024-09-06 16:15:39",
          "row_update_time": "2024-09-06 16:15:39"
        }
      }
    ]
  }
}

Regexp 查询

  • 返回与提供正则相匹配的文档;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "regexp": {
          "field": {
            "value": "5.*0",
            "flags": "ALL",
            "case_insensitive": true,
            "max_determinized_states": 10000,
            "rewrite": "constant_score"
          }
        }
      }
    }

    field[Required, string]:查询字段;

    • value[Requiredstring]:正则表达式
    • flags[Optionalstring]:启用可选运算符;

      • ALL:启用所有可选运算符;
      • “”ALL 的别名;
      • COMPLEMENT:启用~运算符;
      • EMPTY:启用#运算符;#运算符不匹配任何字符串,甚至不匹配空字符串;
      • INTERVAL:启用<>运算符;使用<>来匹配数字范围;
      • INTERSECTION:启用&运算符;类似AND运算符;如果左侧和右侧的模式都匹配,则匹配将成功;
      • ANYSTRING:启用@运算符;匹配任何整个字符串
      • NONE:禁用所有可选运算符;
    • case_insensitive[Optional, bool]:是否大小写敏感,默认true;
    • max_determinized_states[Optionalinteger]:用于限制正则表达式查询在确定化过程中可以生成的状态数,以防止过于复杂的正则表达式导致性能问题或内存溢出;
    • rewrite[Optionalstring]: rewrite parameter
GET policy_info/_search
{
  "query": {
    "regexp": {
      "qdu_id": {
        "value": "5.*0",
        "flags": "ALL",
        "case_insensitive": true,
        "max_determinized_states": 10000
      }
    }
  }
}
{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 10,
      "relation": "eq"
    },

Wildcard 查询

  • 返回包含与通配符模式匹配的文档;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "wildcard": {
          "field": {
            "value": "xxx",
            "boost": 1.0,
            "rewrite": "constant_score_blended"
          }
        }
      }
    }

    field[Required, string]:查询字段;

    • value[Requiredstring]:通配符表达式;此参数支持两个通配符运算符:,匹配任何单个字符;*,可以匹配零个或多个字符,包括一个空字符;
    • boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;
    • case_insensitive[Optional, bool]:是否大小写敏感,默认true;
    • rewrite[Optionalstring]: rewrite parameter
    • wildcard[Optionalstring]:value的别名,两个都提供,则使用最后一个;
GET policy_info/_search
{
  "query": {
    "wildcard": {
      "qdu_id": {
        "value": "*8888*",
        "boost": 1.0
      }
    }
  }
}
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
    ...
    

Compound 查询

Boolean 查询

  • 与其他查询的布尔组合匹配的文档匹配的查询;
  • 参数:

    get xxx/_search
    {
      "query":{
        "bool": {
          "must": [
            {
              "term": {
                "qds": {
                  "value": "0941017"
                }
              }
            }
          ],
          "should": [
            {
              "range": {
                "id": {
                  "gte": 1,
                  "lte": 200
                }
              }
            }
          ],
          "filter": [
            {
              "range": {
                "row_update_time": {
                  "gte": "2024-09-19",
                  "lte": "2024-09-20",
                  "format": "yyyy-MM-dd"
                }
              }
            }
          ],
          "must_not": [
            {
              "terms": {
                "qdu_id": [
                  "552d2db621224195b78a8868bff8f330",
                  "8807bbf8db7b434ca8287209861962f3"
                ]
              }
            }
          ]
        }
      }
    }

    must:查询必须出现在文档中,会贡献得分;

    filter:查询必须出现在文档中,分数会被忽略;

    should:查询应该出现在匹配的文档中,出现会贡献得分;如果布尔查询中只有 should 子句,则至少有一个 should 子句必须匹配;否则,should 子句的数量必须至少为 minimum_should_match 参数指定的数量;

    must_not:查询不能出现在文档中;

    minimum_should_match:[minimum_should_match parameter | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html)

    boost:用于减少或增加查询的相关分数;默认为1;

GET policy_info/_search
{
  "query":{
    "bool": {
      "must": [
        {
          "term": {
          "qds": "0932009"
          }
        }
      ], 
      "should": [
        {
         "range": {
            "id": {
              "lte": 5
            }
          }
        }
      ],
      "filter": [
        {
          "range": {
            "row_update_time": {
              "gte": "2024-09-01",
              "lte": "2024-09-20",
              "format": "yyyy-MM-dd"
            }
          }
        }
      ],
      "must_not": [
        {
          "range": {
            "id": {
              "gte": 100
            }
          }
        }
      ]
    }
  }
}
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 65,
      "relation": "eq"
    },
    "max_score": 5.050299,
    "hits": [
      {
      ...
      }
      ...

Boosting 查询

  • 指定一个正向查询和一个负向查询,并通过降低负向查询匹配的文档的分数来影响最终的相关性评分;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "boosting": {
          "positive": {},
          "negative": {}
          },
          "negative_boost": 0.2
        }
      }
    }

    positive[Requiredquery]:想要运行的查询,返回的文档必须和此查询匹配;

    negative[Requiredquery]:从返回的文档中进行匹配,减少匹配到的文档相关度评分;计算方式如下:

    • 获取positive查询中返回的相关度评分;
    • 评分和negative_boost相乘,得到最终得分;

    negative_boost[Requiredfloat]:0-1中的值,用来减少相关度得分;

GET policy_info/_search
{
  "query": {
    "boosting": {
      "positive": {
        "term": {
          "qds": {
            "value": "0932009"
          }
        }
      },
      "negative": {
        "range": {
          "id": {
            "gte": 2,
            "lte": 65
          }
        }
      },
      "negative_boost": 0.2
    }
  }
}
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 65,
      "relation": "eq"
    },
    "max_score": 3.9069033,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "1",
        "_score": 3.9069033,
        "_source": {
          ...
        }
      },
      {
        "_index": "policy_info",
        "_id": "5",
        "_score": 0.81005985,
        "_source": {
          ...
        }
      },

Constant score 查询

  • 将查询的相关性评分设置为一个常量值;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "constant_score": {
          "filter": {},
          "boost": 1.2
        }
      }
    }

    filter[Requiredquery]:返回匹配此查询的文档;

    boost:查询的相关分数;默认为1;

GET policy_info/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "range": {
          "id": {
            "gte": 1,
            "lte": 2
          }
        }
      },
      "boost": 1.2
    }
  }
}
{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1.2,
    "hits": []
    ...

Disjunction max 查询

  • 组合多个查询,并返回匹配文档中得分最高的那个查询的分数;
  • 参数:

    GET policy_info/_search
    {
      "query": {
        "dis_max": {
          "tie_breaker": 0.7,
          "boost": 1,
          "queries": [
              {},
              {}
          ]
        }
      }
    }

    tie_breaker[Optionalfloat]:$final\_score=maxscore + \sum_1^n score * tie\_breaker$ ,由于默认值为0,所以Elasticsearch会默认使用最高的相关性分数;

    querys[Requiredquery]:包含一个或多个查询子句;返回的文档必须与这些查询中的一个或多个匹配;如果文档匹配多个查询,Elasticsearch将使用最高的相关性分数;

GET policy_info/_search
{
  "query": {
    "dis_max": {
      "tie_breaker": 0.7,
      "boost": 1,
      "queries": [
        {
          "range": {
            "id": {
              "gte": 1,
              "lte": 2
            }
          }
        },
        {
          "term": {
            "qdu_id": {
              "value": "b1e4ef6d8dba42b297c99c0b450eecab"
            }
          }
        }
      ]
    }
  }
}
{
  "took": 10,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 6.6652584,
    "hits": [
    ...
    ]
    ...

Function score 查询  TODO

Full Text 查询

match 查询

  • 匹配搜索会比较搜索词和每个文档的相似度,只要搜索词能命中文档的分词就会被搜索到;
  • 参数:

    GET policy_info/_search
    {
      "query": {
        "match": {
          "field": {
            "query": "xxxxx",
            "analyzer": "standard",
            "auto_generate_synonyms_phrase_query": true,
            "boost": 1,
            "fuzziness": 1,
            "max_expansions": 10,
            "prefix_length": 2,
            "fuzzy_transpositions": true,
            "fuzzy_rewrite": "constant_score",
            "lenient": "true",
            "operator": "or",
            "minimum_should_match": 6,
            "zero_terms_query": "none"
          }
        }
      }
    }

    field[Required, object]:查询字段;

    • query[Required, string]:查询
    • analyzer[Optional, string]:分词器
    • auto_generate_synonyms_phrase_query[Optional, Boolean]:用于控制在处理同义词时是否自动生成短语查询;
    • boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;
    • fuzziness[Optional, float]:最大可编辑距离;[Common options | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness)
    • max_expansions[Optional, float]:模糊查询过程中生成的变体词语的最大数量;
    • prefix_length[Optional, float]:前缀长度,表示在进行模糊匹配之前,词语的前缀部分必须完全匹配的长度
    • fuzzy_transpositions[Optional, Boolean]:是否允许字符换位
    • fuzzy_rewriterewrite parameter
    • lenient[Optional, Boolean]:是否忽略格式错误
    • operator:指定查询词条之间的逻辑关系;

      • OR:词条之间的关系是“或”,即只要有一个词条匹配,文档就会被认为是相关的;
      • AND:词条之间的关系是“与”,即所有词条都必须匹配;
    • minimum_should_match:至少应该匹配的数量,[minimum_should_match parameter | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html)
    • zero_terms_query:analyzer 分析后如果没有词,例如停用词被删除:

      • none:默认,返回空;
      • all:全部返回;
GET policy_info/_search
{
  "query": {
    "match": {
      "standard_department": {
        "query": "南京,人民,政府",
        "analyzer": "standard",
        "auto_generate_synonyms_phrase_query": true,
        "boost": 1,
        "fuzziness": 1,
        "max_expansions": 10,
        "prefix_length": 2,
        "fuzzy_transpositions": true,
        "fuzzy_rewrite": "constant_score",
        "lenient": "true",
        "operator": "or",
        "minimum_should_match": 6,
        "zero_terms_query": "none"
      }
    }
  }
}
{
  "took": 18,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 65,
      "relation": "eq"
    },
    "max_score": 6,
    "hits": []

Match boolean prefix 查询

  • 对输入的内容进行分词,对每个分词使用term查询(除最后一个分词),对最后一个分词使用prefix查询,把所有子查询放到bool查询的should列表中;
  • 参数:

    GET xxx/_search
    {
      "query": {
        "match_bool_prefix":{
          "field": {
            "query": "xxxx xx",
            "analyzer": "standard",
            "operator": "OR",
            "minimum_should_match": 1,
            "fuzziness": "AUTO",
            "prefix_length": 2,
            "max_expansions": 50,
            "fuzzy_transpositions": true
          }
        }
      }
    }

    field[Required, object]:查询字段;

    • query[Required, string]:查询
    • analyzer[Optional, string]:分词器
    • operator[Optional, string]:指定查询词条之间的逻辑关系;

      • OR:词条之间的关系是“或”,即只要有一个词条匹配,文档就会被认为是相关的;
      • AND:词条之间的关系是“与”,即所有词条都必须匹配;
    • minimum_should_match[Optional, float]:至少应该匹配的数量,[minimum_should_match parameter | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html)
    • fuzzinessprefix_lengthmax_expansionsfuzzy_transpositionsfuzzy_rewrite参数会被放入所有term查询中;
GET policy_info/_search
{
  "query": {
    "match_bool_prefix":{
      "department_district": {
        "query": "320100 32020",
        "analyzer": "standard",
        "operator": "OR",
        "minimum_should_match": 1,
        "fuzziness": "AUTO",
        "prefix_length": 2,
        "max_expansions": 50,
        "fuzzy_transpositions": true
      }
    }
  }
}
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1407,
      "relation": "eq"
    },
    "max_score": 3.4872897,
    "hits": []
    ...
...

Match phrase

  • match_phrase会将检索关键词分词;match_phrase的分词结果必须在被检索字段的分词中都包含,而且顺序必须相同,而且默认必须都是连续的;
  • 参数

    GET xxx/_search
    {
      "query": {
        "match_phrase": {
          "field": {
            "query": "xxx",
            "slop": 5,
            "analyzer": "standard", 
            "zero_terms_query": "none"
          }
        }
      }
    }

    field[Required, object]:查询字段;

    • query[Required, string]:查询;
    • analyzer[Optional, string]:分词器;
    • slop[Optional, float]:允许短语查询在词条之间插入slop个数量的其他词条;
    • zero_terms_queryanalyzer 分析后如果没有词,例如停用词被删除:

      • none:默认,返回空;
      • all:全部返回;
GET policy_info/_search
{
  "query": {
    "match_phrase": {
      "title": {
        "query": "人民政府工作规则 通知",
        "slop": 2,
        "analyzer": "standard", 
        "zero_terms_query": "none"
      }
    }
  }
}
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 6,
      "relation": "eq"
    },
    "max_score": 8.821167,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "2884",
        "_score": 8.821167,
        "_source": {
          "title": "许昌市人民政府关于印发许昌市人民政府工作规则的通知",
        }
      },

Match phrase prefix 查询

  • 用于查找包含提供的文本词条的文档,这些词条的顺序与提供的顺序相同。提供的文本的最后一个词条被视为前缀,匹配任何以该词条开头的单词。
  • 参数:

    GET policy_info/_search
    {
      "query": {
        "match_phrase_prefix": {
          "field": {
            "query": "xxx xxx",
            "analyzer": "standard",
            "max_expansions": 10,
            "slop": 1,
            "zero_terms_query": "none"
          }
        }
      }
    }

    field[Required, object]:查询字段;

    • query[Required, string]:查询;
    • analyzer[Optional, string]:分词器;
    • max_expansions[Optional, integer]:查询过程中生成的变体词语的最大数量,默认50
    • slop[Optional, float]:允许短语查询在词条之间插入slop个数量的其他词条;
    • zero_terms_queryanalyzer 分析后如果没有词,例如停用词被删除:

      • none:默认,返回空;
      • all:全部返回;
GET policy_info/_search
{
  "query": {
    "match_phrase_prefix": {
      "title": {
        "query": "市人民政府关于印发",
        "analyzer": "standard",
        "max_expansions": 10,
        "slop": 1,
        "zero_terms_query": "none"
      }
    }
  }
}
{
  "took": 13,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 295,
      "relation": "eq"
    },
    "max_score": 5.0614715,
    "hits": [
      {
        "_index": "policy_info",
        "_id": "1881",
        "_score": 5.0614715,
        "_source": {
          "id": 1881,
          "qdu_id": "0e286b12060f464eb893fb0a75dc66df",
          ...
          "title": "洛阳市人民政府关于印发洛阳动力谷发展规划(2011-2015..",
        }
      },
...

Query string 查询

  • 根据提供的查询字符串返回文档
  • 参数

    GET policy_info/_search
    {
      "query": {
        "query_string": {
          "default_field": "xxx",
          "query": "",
          "allow_leading_wildcard": true,
          "analyze_wildcard": false,
          "analyzer": "standard",
          "auto_generate_phrase_queries": false,
          "boost": 1,
          "default_operator": "OR",
          "enable_position_increments": true,
          "fields": ["*"],
          "fuzziness": 2,
          "fuzzy_max_expansions": 50,
          "fuzzy_prefix_length": 0,
          "fuzzy_transpositions": true,
          "lenient": false,
          "max_determinized_states": 10000,
          "minimum_should_match": 1,
          "quote_analyzer": "simple",
          "phrase_slop": 2,
          "quote_field_suffix": ".appName",
          "rewrite": "constant_score",
          "time_zone": "+00:00"
        }
      }
    }

    query[Requiredstring]:查询字符串,语法:

    • field name

      • status:active:在fieldnamestatus的字段中查找active
      • title:(quick OR brown):在field nametitle的字段中查找quickbrown
      • author:"John Smith":在field nameauthor的字段中查找精确短语John Smith
      • first\ name:Alice:在field namefirst name的字段中查找Alice
      • book.\*:(quick OR brown):在所有以 book. 开头的字段中查找quickbrown
      • _exists_:title:查title非空的数据;
    • wildcards

      • ?代替单个字符,用*代替零个或多个字符;
      • 匹配符在开头可能导致性能问题。
    • Regular expression

      • 使用"/"包裹的字符串可被解析为正则表达式;例如:name:/joh?n(ath[oa]n)/
    • Fuzziness

    • Proximity

      • 短语查询要求单词是完全相同的顺序且距离为0,但临近查询可以设置最大可编辑距离;
      • "fox quick"~5foxquick间最多可有五个单词;
    • Ranges[]闭区间,{}开区间;

      • date:[2012-01-01 TO 2012-12-31]:日期从2012-01-012012-12-31
      • count:[1 TO 5]:count 从15
      • tag:{alpha TO omega}tagalphaomega,开区间;
      • count:[10 TO *]count 大于等于10
      • date:{* TO 2012-01-01}date小于2012-01-01
      • count:[1 TO 5}15,包括1,不包括5
      • age:>10;age:>=10;age:<10;age:<=10:这些也行;
    • Boosting

      • ^符号增加相关度,可用在单词,短语和组上;
      • 例如quick^2 foxquick的权重为2fox的权重为1
    • Boolean operators

      • +单词必须被匹配,-单词必须不被匹配;
      • 例如:quick brown +fox -newsfox必须被匹配,news必须不被匹配;quickbrown可不被匹配,匹配到会增加相关度;
    • Grouping

      • (quick OR brown) AND fox:匹配quick AND fox或者`brown AND fox;
      • status:(active OR pending) title:(full text search)^2
    • Reserved characters

      • + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ /这些字符用\转义;
    • Whitespaces and empty queries

      • 空格不被认为是运算符,如果查询字符串为空或者只包含空格,查询将返回空集;
    • Avoid using the query_string query for nested documents

      • 使用nested query查询nested documents,不要使用query_string
    • Search multiple fields

    default_field[Optionalstring]:查询字符串中没有明确指定字段时,默认搜索的字段;可以是*,,搜索所有的字段;不包括nested documents

    allow_leading_wildcard[OptionalBoolean]:如果为真,则通配符*?可在查询字符串第一位;默认为true

    analyze_wildcard:是否对查询字符串中的通配符进行分析,默认为false, 这种情况下,Elasticsearch不会对这些通配符术语进行分析,这意味着它们会被直接用于匹配索引中的词项。

    analyzer:分词器;

    auto_generate_synonyms_phrase_query:控制是否自动生成同义词短语查询,默认是true。例如,如果你的同义词配置中包含 "quick, fast" 这样的同义词对,当用户搜索 "quick brown fox" 时,Elasticsearch 会自动生成一个短语查询来匹配"fast brown fox"

    boost[Optional, float]:用于减少或增加查询的相关分数;默认为1;介于01.0之间的提升值会降低相关性分数。大于1.0的值会增加相关性分数。

    default_operator[Optionalstring]:指定查询词条之间的逻辑关系;

    • OR:词条之间的关系是“或”,即只要有一个词条匹配,文档就会被认为是相关的;
    • AND:词条之间的关系是“与”,即所有词条都必须匹配;

    enable_position_increments[OptionalBoolean]:启用位置增量。位置增量是指在分析过程中,词项之间的距离(即位置差异);如果你的分析器移除了停用词,那么词项之间的实际距离会被保留并在查询中考虑;

    fields[OptionalList[string]]:要搜索的field,以下两种查询等价。可以使用简单通配符;

    • GET /_search
      {
        "query": {
          "query_string": {
            "fields": [ "content", "name", "city.*"],
            "query": "this AND that"
          }
        }
      }
    • GET /_search
      {
        "query": {
          "query_string": {
            "query": "(content:this OR name:this) AND (content:that OR name:that) AND (city.\\*:that OR city.\\*:that)"
          }
        }
      }

    fuzziness[Optional, float]:最大可编辑距离;[Common options | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness)

    fuzzy_max_expansions[Optional, integer]:查询过程中生成的变体词语的最大数量,默认50

    fuzzy_prefix_length[Optional, integer]:前缀长度,表示在进行模糊匹配之前,词语的前缀部分必须完全匹配的长度;

    fuzzy_transpositions[OptionalBoolean]:是否允许字符换位,默认true;

    lenient[OptionalBoolean]:是否忽略格式错误;

    max_determinized_states[Optionalinteger]:用于限制正则表达式查询在确定化过程中可以生成的状态数,以防止过于复杂的正则表达式导致性能问题或内存溢出;

    minimum_should_match[Optional, float]:至少应该匹配的数量,[minimum_should_match parameter | Elasticsearch Guide [8.17] | Elastic](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-minimum-should-match.html)

    quote_analyzer[Optionalstring]:指定了用于将查询字符串中引用的文本(即用双引号括起来的文本)转换为令牌(tokens)的分析器;

    • {
        "query": {
          "query_string": {
            "query": "\"quick brown fox\" AND title:Elasticsearch",
            "fields": ["title", "content"],
            "quote_analyzer": "standard"
          }
        }
      }
    • quote_analyzer 设置为 standard,这意味着 Elasticsearch 会使用 standard 分析器来处理引用的文本 "quick brown fox"quote_analyzer 只影响引用的文本部分,不会影响查询字符串中其他部分的分析。

      phrase_slop[Optionalinteger]:允许短语查询在词条之间插入slop个数量的其他词条;

      quote_field_suffix[Optionalstring]: TODO

      rewrite[Optionalstring]: rewrite parameter

      time_zone:国际协调时间(UTC)偏移量或者IANA时区;合法值+01:00-09:00Asia/Shanghai

      type[Optional, string]:多字段查询时可用:Query string query | Elasticsearch Guide 8.17 | Elastic

      GET policy_info/_search
      {
      "query": {
      "query_string": {
        "default_field": "title",
        "query": "+\"洛阳市\"*",
        "allow_leading_wildcard": true,
        "analyze_wildcard": false,
        "analyzer": "standard",
        "auto_generate_synonyms_phrase_query": false,
        "boost": 1,
        "default_operator": "OR",
        "enable_position_increments": true,
        // "fields": ["*"],
        "fuzziness": 2,
        "fuzzy_max_expansions": 50,
        "fuzzy_prefix_length": 0,
        "fuzzy_transpositions": true,
        "lenient": false,
        "max_determinized_states": 10000,
        "minimum_should_match": 1,
        "quote_analyzer": "standard",
        "phrase_slop": 2,
        "quote_field_suffix": ".appName",
        "rewrite": "constant_score",
        "time_zone": "+00:00"
      }
      }
      }
      {
      "took": 25,
      "timed_out": false,
      "_shards": {
      "total": 3,
      "successful": 3,
      "skipped": 0,
      "failed": 0
      },
      "hits": {
      "total": {
        "value": 722,
        "relation": "eq"
      },
      "max_score": 5.318619,
      "hits": [
        {
          "_index": "policy_info",
          "_id": "2099",
          "_score": 5.318619,
            "title": "洛阳市人民政府办公室转发洛阳市司法局洛阳市卫生局洛阳市财政局..",
        }
      ...

声明:Hello World|版权所有,违者必究|如未注明,均为原创|本网站采用BY-NC-SA协议进行授权

转载:转载请注明原文链接 - ElasticSearch


我的朋友,理论是灰色的,而生活之树是常青的!