参考文章:
- ES傻瓜式教程
- Es基础语法整理
ElasticSearch:智能搜索,分布式的搜索引擎,是ELK的一个组成
ELK代表的是:E就是ElasticSearch,L就是Logstach,K就是kibana
E:EalsticSearch 搜索和分析的功能
L:Logstach 搜集数据的功能,是日志收集系统
K:Kibana 数据可视化(分析),可以用图表的方式来去展示,是数据可视化平台
ES倒排索引
ES的api请求使用的restful风格
PUT 类似于SQL中的增
DELETE 类似于SQL中的删
POST 类似于SQL中的改
GET 类似于SQL中的查
一些额外参数说明
# 创建索引
PUT testindex
# 响应结果 注意:创建索引库的分片数默认 1 片,在 7.0.0 之前的 Elasticsearch 版本中,默认 5 片
{
# 响应结果true *** 作成功
"acknowledged": true,
# 分片结果 分片 *** 作成功
"shards_acknowledged": true,
# 索引名称
"index": "testindex"
}
# 查看所有索引 `_cat` 表示查看的意思
GET /_cat/indices?v
# 结果
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
green open .geoip_databases 7ZecJqsSRumQxB3qBkYRXA 1 1 44 0 82.9mb 41.4mb
green open testindex I49li6FqTFGKEbPhS4eBlQ 1 1 0 0 452b 226b
green open .apm-custom-link uhs4yAzCS-CBXAtqAvs--w 1 1 0 0 452b 226b
green open .apm-agent-configuration nw-8UCn1ReK29sCenMsTtg 1 1 0 0 452b 226b
green open .async-search 9jcSECDnQKm1CChh1oS-jQ 1 1 2 0 23.9kb 6.7kb
green open .kibana_task_manager_7.17.1_001 9sGLoaxDQwWBxULbIw2wVA 1 1 17 678 1.1mb 628.3kb
green open .kibana_7.17.1_001 MRGtr9rrTySlnGveDM_7_A 1 1 282 41 9.6mb 4.8mb
green open nginx-access-20220324 zVd5XD2wQxqOuN1HEiOWPQ 1 1 56 0 114.5kb 66.3kb
green open .tasks XAb4kQNZR_qfLMRerxcDTQ 1 1 10 0 106.6kb 56.3kb
# 查询单个索引
GET testindex
# 结果
{
"testindex" : {
"aliases" : { },
"mappings" : { },
"settings" : {
"index" : {
"creation_date" : "1640052649065",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "qNOOFYeOSGWHyQlxWAnpiA",
"version" : {
"created" : "7030099"
},
"provided_name" : "testindex"
}
}
}
}
# 说明
{
"testindex"【索引名】: {
"aliases"【别名】: {},
"mappings"【映射】: {},
"settings"【设置】: {
"index"【设置 - 索引】: {
"creation_date"【设置 - 索引 - 创建时间】: "1614265373911",
"number_of_shards"【设置 - 索引 - 主分片数量】: "1",
"number_of_replicas"【设置 - 索引 - 副分片数量】: "1",
"uuid"【设置 - 索引 - 唯一标识】: "eI5wemRERTumxGCc1bAk2A",
"version"【设置 - 索引 - 版本】: {
"created": "7080099"
},
"provided_name"【设置 - 索引 - 名称】: "testindex"
}
}
}
}
# 删除索引
DELETE testindex
{
"acknowledged" : true
}
# 创建文档
POST testindex/_doc
{
"title":"华为P50 Pro",
"content":"原色双影像单元,搭载HarmonyOS2,万象双环设计,欢迎选购!华为手机热销爆款,限量抢购",
"price":5988
}
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "glbS2n0BgOyOsl0n-wMM",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 0,
"_primary_term" : 1
}
# 结果解释
{
"_index"【索引】: "testindex",
"_type"【类型-文档】: "_doc",
"_id"【唯一标识】: "Xhsa2ncBlvF_7lxyCE9G", #可以类比为 MySQL 中的主键,随机生成
"_version"【版本】: 1,
"result"【结果】: "created", #这里的 create 表示创建成功
"_shards"【分片】: {
"total"【分片 - 总数】: 2,
"successful"【分片 - 成功】: 1,
"failed"【分片 - 失败】: 0
},
"_seq_no": 0,
"_primary_term": 1
}
# 使用自定义的ID
PUT testindex/_doc/1
{
"title":"华为P40 5G",
"content":"新一代智慧旗舰,多主摄融合计算摄影,全新一代骁龙8,智慧隐私通话,荣耀Magic..",
"price":6899
}
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "2",
"_version" : 1,
"result" : "created",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 2
}
# 结果 解释同上
GET testindex/_doc/1
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_version" : 1,
"_seq_no" : 1,
"_primary_term" : 2,
"found" : true,
"_source" : {
"title" : "华为P40 5G",
"content" : "新一代智慧旗舰,多主摄融合计算摄影,全新一代骁龙8,智慧隐私通话,荣耀Magic..",
"price" : 6899
}
}
# 结果解释
{
"_index"【索引】: "testindex",
"_type"【文档类型】: "_doc",
"_id": "1",
"_version": 2,
"_seq_no": 2,
"_primary_term": 2,
"found"【查询结果】: true, # true 表示查找到,false 表示未查找到
"_source"【文档源信息】: {
"title": "华为P40 5G",
"content": "新一代智慧旗舰,多主摄融合计算摄影,全新一代骁龙8,智慧隐私通话,荣耀Magic..",
"price": 6899
}
}
# 查询全部文档
GET testindex/_search
# 结果
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "dVcI0X8BNJdkuKp0aXrb",
"_score" : 1.0,
"_source" : {
"title" : "华为P50 Pro",
"content" : "原色双影像单元,搭载HarmonyOS2,万象双环设计,欢迎选购!华为手机热销爆款,限量抢购",
"price" : 5988
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"title" : "华为P40 5G",
"content" : "新一代智慧旗舰,多主摄融合计算摄影,全新一代骁龙8,智慧隐私通话,荣耀Magic..",
"price" : 6899
}
}
]
}
}
# 全量修改
POST testindex/_doc/1
{
"title":"华为111",
"content":"1111111",
"price":666
}
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 3,
"_primary_term" : 2
}
# 结果解释
{
"_index": "testindex",
"_type": "_doc",
"_id": "1",
"_version"【版本】: 2,
"result"【结果】: "updated", # updated 表示数据被更新
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 2,
"_primary_term": 2
}
# 修改部分字段
POST testindex/_update/1
{
"doc":{
"content":"22222"
}
}
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_version" : 5,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 6,
"_primary_term" : 2
}
# 删除文档(根据_id删除)
DELETE testindex/_doc/1
# 结果
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_version" : 6,
"result" : "deleted",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 7,
"_primary_term" : 2
}
# 结果解释
{
"_index": "testindex",
"_type": "_doc",
"_id": "1",
"_version"【版本】: 4, #对数据的 *** 作,都会更新版本
"result"【结果】: "deleted", # deleted 表示数据被标记为删除
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 4,
"_primary_term": 2
}
# 条件更新
POST testindex/_update_by_query
{
"query": {
"bool": {
"must": [
{
"term": {
"title.keyword": "huawei"
}
},
{
"term": {
"price": 6899
}
}
]
}
},
"script": {
"source": "ctx._source.price=\"6999\";ctx._source.title=\"华为手机\""
}
}
# 删除文档(条件删除)
POST testindex/_delete_by_query
{
"query":{
"match":{
"title.keyword":"华为"
}
}
}
# 查询映射
GET testindex/_mapping
# 结果
{
"testindex" : {
"mappings" : {
"properties" : {
"content" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"title" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
# 创建映射
PUT testindex/_mapping
{
"properties": {
"title": {
"type": "text",
"index": true
},
"content": {
"type": "text",
"index": true
},
"name": {
"type": "text",
"index": true
}
}
}
# 映射数据说明:
#字段名:任意填写,下面指定许多属性,例如:title、subtitle、images、price
#type:类型,Elasticsearch 中支持的数据类型非常丰富,说几个关键的:
# String 类型,又分两种:
# text:可分词
# keyword:不可分词,数据会作为完整字段进行匹配
# Numerical:数值类型,分两类
# 基本数据类型:long、integer、short、byte、double、float、half_float
# 浮点数的高精度类型:scaled_float
# Date:日期类型
# Array:数组类型
# Object:对象
# index:是否索引,默认为 true,也就是说你不进行任何配置,所有字段都会被索引。
# true:字段会被索引,则可以用来进行搜索
# false:字段不会被索引,不能用来搜索
# store:是否将数据进行独立存储,默认为 false
# 原始的文本会存储在_source 里面,默认情况下其他提取出来的字段都不是独立存储
# 的,是从_source 里面提取出来的。
当然你也可以独立的存储某个字段,只要设置
# "store": true 即可,获取独立存储的字段要比从_source 中解析快得多,但是也会占用
# 更多的空间,所以要根据实际业务需求来设置。
# analyzer:分词器,ik_max_word 即使用 ik 分词器
# 高级查询 =====
# 基础数据
PUT student
POST /student/_doc/1001
{
"name":"zhangsan",
"nickname":"zhangsan",
"sex":"男",
"age":30
}
POST /student/_doc/1002
{
"name":"lisi",
"nickname":"lisi",
"sex":"男",
"age":20
}
POST /student/_doc/1003
{
"name":"wangwu",
"nickname":"wangwu",
"sex":"女",
"age":40
}
POST /student/_doc/1004
{
"name":"zhangsan1",
"nickname":"zhangsan1",
"sex":"女",
"age":50
}
POST /student/_doc/1005
{
"name":"zhangsan2",
"nickname":"zhangsan2",
"sex":"女",
"age":30
}
# 查询所有文档
GET student/_search
{
"query": {
"match_all": {}
}
}
# "query":这里的 query 代表一个查询对象,里面可以有不同的查询属性
# "match_all":查询类型,例如:match_all(代表查询所有), match,term , range 等等
# {查询条件}:查询条件会根据类型的不同,写法也有差异
# 结果
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1002",
"_score" : 1.0,
"_source" : {
"name" : "lisi",
"nickname" : "lisi",
"sex" : "男",
"age" : 20
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1003",
"_score" : 1.0,
"_source" : {
"name" : "wangwu",
"nickname" : "wangwu",
"sex" : "女",
"age" : 40
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1004",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan1",
"nickname" : "zhangsan1",
"sex" : "女",
"age" : 50
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1005",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan2",
"nickname" : "zhangsan2",
"sex" : "女",
"age" : 30
}
}
]
}
}
# 结果解释:
{
"took【查询花费时间,单位毫秒】" : 1116,
"timed_out【是否超时】" : false,
"_shards【分片信息】" : {
"total【总数】" : 1,
"successful【成功】" : 1,
"skipped【忽略】" : 0,
"failed【失败】" : 0
},
"hits【搜索命中结果】" : {
"total"【搜索条件匹配的文档总数】: {
"value"【总命中计数的值】: 3,
"relation"【计数规则】: "eq" # eq 表示计数准确, gte 表示计数不准确
},
"max_score【匹配度分值】" : 1.0,
"hits【命中结果集合】" : [
{......}
]
}
}
# 匹配查询
GET student/_search
{
"query": {
"match": {
"name": "zhangsan"
}
}
}
# match 匹配类型查询,会把查询条件进行分词,然后进行查询,多个词条之间是 or 的关系
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.3862944,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.3862944,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
}
]
}
}
# 字段匹配查询
GET student/_search
{
"query": {
"multi_match": {
"query": "zhangsan",
"fields": [
"name",
"nickname"
]
}
}
}
# multi_match 与 match 类似,不同的是它可以在多个字段中查询
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.3862944,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.3862944,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
}
]
}
}
# 关键字精确查询
GET student/_search
{
"query": {
"term": {
"name": {
"value": "zhangsan"
}
}
}
}
# term 查询,精确的关键词匹配查询,不对查询条件进行分词
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.3862944,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.3862944,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
}
]
}
}
# 多关键字精确查询
GET student/_search
{
"query": {
"terms": {
"name": ["zhangsan","lisi"]
}
}
}
# terms 查询和 term 查询一样,但它允许你指定多值进行匹配。
# 如果这个字段包含了指定值中的任何一个值,那么这个文档满足条件,类似于 mysql 的 in
# 查询结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1002",
"_score" : 1.0,
"_source" : {
"name" : "lisi",
"nickname" : "lisi",
"sex" : "男",
"age" : 20
}
}
]
}
}
# 指定查询字段
GET student/_search
{
"_source": [
"name",
"nickname"
],
"query": {
"terms": {
"nickname": [
"zhangsan"
]
}
}
}
# 默认情况下,Elasticsearch 在搜索的结果中,会把文档中保存在_source 的所有字段都返回。
# 如果我们只想获取其中的部分字段,我们可以添加_source 的过滤
# 查询结果
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan"
}
}
]
}
}
# 过滤字段
# includes:来指定想要显示的字段
# excludes:来指定不想要显示的字段
GET student/_search
{
"_source": {
"includes": [
"name",
"nickname"
]
},
"query": {
"terms": {
"nickname": [
"zhangsan"
]
}
}
}
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan"
}
}
]
}
}
# 组合查询
GET student/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "zhangsan"
}
}
],
"must_not": [
{
"match": {
"age": "40"
}
}
],
"should": [
{
"match": {
"sex": "男"
}
},
{
"match": {
"sex": "女"
}
}
]
}
}
}
# `bool`把各种其它查询通过
# `must`(必须 )
# `must_not`(必须不)
# `should`(应该)(多个满足一个即可)的方式进行组合
# 结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 2.261763,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 2.261763,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
}
]
}
}
# 范围查询
range 查询找出那些落在指定区间内的数字或者时间。
range 查询允许以上字符
GET student/_search
{
"query": {
"range": {
"age": {
"gte": 30,
"lte": 35
}
}
}
}
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1005",
"_score" : 1.0,
"_source" : {
"name" : "zhangsan2",
"nickname" : "zhangsan2",
"sex" : "女",
"age" : 30
}
}
]
}
}
# 模糊查询
# 返回包含与搜索字词相似的字词的文档。
# 编辑距离是将一个术语转换为另一个术语所需的一个字符更改的次数。
这些更改可以包括:
# 更改字符(box → fox)
# 删除字符(black → lack)
# 插入字符(sic → sick)
# 转置两个相邻字符(act → cat)
# 为了找到相似的术语,fuzzy 查询会在指定的编辑距离内创建一组搜索词的所有可能的变体或扩展。
然后查询返回每个扩展的完全匹配。
# 通过 fuzziness 修改编辑距离。
一般使用默认值 AUTO,根据术语的长度生成编辑距离。
GET student/_search
{
"query": {
"fuzzy": {
"name": {
"value": "zhangsa1n",
"fuzziness": 2
}
}
}
}
# 结果
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.2322617,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1004",
"_score" : 1.2322617,
"_source" : {
"name" : "zhangsan1",
"nickname" : "zhangsan1",
"sex" : "女",
"age" : 50
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.2130076,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
}
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1005",
"_score" : 1.078229,
"_source" : {
"name" : "zhangsan2",
"nickname" : "zhangsan2",
"sex" : "女",
"age" : 30
}
}
]
}
}
# 排序
# sort 可以让我们按照不同的字段进行排序,并且通过 order 指定排序的方式。
desc 降序,asc升序。
# 单字段
GET student/_search
{
"query": {
"match": {
"name":"zhangsan"
}
},
"sort": [{
"age": {
"order":"desc"
}
}]
}
# 多字段
GET student/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "desc"
}
},
{
"_id": {
"order": "desc"
}
}
]
}
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : null,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
},
"sort" : [
30
]
}
]
}
}
#高亮查询
# 在进行关键字搜索时,搜索出的内容中的关键字会显示不同的颜色,称之为高亮
# Elasticsearch 可以对查询内容中的关键字部分,进行标签和样式(高亮)的设置。
# 在使用 match 查询的同时,加上一个 highlight 属性:
# pre_tags:前置标签
# post_tags:后置标签
# fields:需要高亮的字段
# title:这里声明 title 字段需要高亮,后面可以为这个字段设置特有配置,也可以空
GET student/_search
{
"query": {
"match": {
"name": "zhangsan"
}
},
"highlight": {
"pre_tags": "",
"post_tags": "",
"fields": {
"name": {}
}
}
}
# 结果
{
"took" : 62,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.3862944,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1001",
"_score" : 1.3862944,
"_source" : {
"name" : "zhangsan",
"nickname" : "zhangsan",
"sex" : "男",
"age" : 30
},
"highlight" : {
"name" : [
"zhangsan"
]
}
}
]
}
}
# 分页查询
# from:当前页的起始索引,默认从 0 开始。
from = (pageNum - 1) * size
# size:每页显示多少条
GET student/_search
{
"query": {
"match_all": {}
},
"sort": [
{
"age": {
"order": "desc"
}
}
],
"from": 0,
"size": 2
}
# 结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1004",
"_score" : null,
"_source" : {
"name" : "zhangsan1",
"nickname" : "zhangsan1",
"sex" : "女",
"age" : 50
},
"sort" : [
50
]
},
{
"_index" : "student",
"_type" : "_doc",
"_id" : "1003",
"_score" : null,
"_source" : {
"name" : "wangwu",
"nickname" : "wangwu",
"sex" : "女",
"age" : 40
},
"sort" : [
40
]
}
]
}
}
# 聚合查询
# 聚合允许使用者对 es 文档进行统计分析,类似与关系型数据库中的 group by,当然还有很多其他的聚合,例如取最大值、平均值等等。
# 最大值
GET student/_search
{
"aggs":{
"max_age":{
"max":{"field":"age"}
}
},
"size":0
}
# 最小值
GET student/_search
{
"aggs":{
"min_age":{
"min":{"field":"age"}
}
},
"size":0
}
# 求和
GET student/_search
{
"aggs":{
"sum_age":{
"sum":{"field":"age"}
}
},
"size":0
}
# 平均值
GET student/_search
{
"aggs":{
"avg_age":{
"avg":{"field":"age"}
}
},
"size":0
}
# 对某个字段的值进行去重之后再取总数
GET student/_search
{
"aggs":{
"distinct_age":{
"cardinality":{"field":"age"}
}
},
"size":0
}
# State 聚合 stats 聚合,对某个字段一次性返回 count,max,min,avg 和 sum 五个指标
GET student/_search
{
"aggs":{
"stats_age":{
"stats":{"field":"age"}
}
},
"size":0
}
# 桶聚合查询
# 桶聚和相当于 sql 中的 group by 语句
# terms 聚合,分组统计
GET student/_search
{
"aggs":{
"age_groupby":{
"terms":{"field":"age"}
}
},
"size":0
}
# 在 terms 分组下再进行聚合
GET student/_search
{
"aggs":{
"age_groupby":{
"terms":{
"field":"age"
},
"aggs":{
"sum_age":{
"sum":{
"field":"age"
}
}
}
}
},
"size":0
}}
# 批量查询 mget
# 单条查询 GET /test_index/_doc/1,如果查询多个id的文档一条一条查询,网络开销太大。
GET /_mget
{
"docs" : [
{
"_index" : "test_index",
"_type" : "_doc",
"_id" : 1
},
{
"_index" : "test_index",
"_type" : "_doc",
"_id" : 7
}
]
}
# 去掉type
GET /_mget
{
"docs" : [
{
"_index" : "test_index",
"_id" : 2
},
{
"_index" : "test_index",
"_id" : 3
}
]
}
# 同一索引下批量查询
GET /test_index/_mget
{
"docs" : [
{
"_id" : 2
},
{
"_id" : 3
}
]
}
# 第三种写法
post /test_index/_doc/_search
{
"query": {
"ids" : {
"values" : ["1", "7"]
}
}
}
# 批量增删改 bulk =====
# Bulk *** 作解释:将文档的增删改查一些列 *** 作,通过一次请求全都做完。
减少网络传输次数。
POST /_bulk
{"action": {"metadata"}}
{"data"}
# 如下 *** 作,删除5,新增14,修改2。
POST /_bulk
{ "delete": { "_index": "test_index", "_id": "5" }}
{ "create": { "_index": "test_index", "_id": "14" }}
{ "test_field": "test14" }
{ "update": { "_index": "test_index", "_id": "2"} }
{ "doc" : {"test_field" : "bulk test"} }
# 1功能:
# delete:删除一个文档,只要1个json串就可以了
# create:相当于强制创建 PUT /index/type/id/_create
# index:普通的put *** 作,可以是创建文档,也可以是全量替换文档
# update:执行的是局部更新partial update *** 作
# 2格式:每个json不能换行。
相邻json必须换行。
# 3隔离:每个 *** 作互不影响。
*** 作失败的行会返回其失败信息。
# 4实际用法:bulk请求一次不要太大,否则一下积压到内存中,性能会下降。
所以,一次请求几千个 *** 作、大小在几M正好。
日志分析系统的用处以上主要是es在搜索方面的应用,接下来我们来看一下es结合logstash和kibana做日志分析
假如一个分布式系统有 1000 台机器,系统出现故障时,就要看下日志,这样得一台一台登录上去查看,非常麻烦
但是如果日志接入了 ELK 系统就不一样。
比如系统运行过程中,突然出现了异常,在日志中就能及时反馈,日志进入 ELK 系统中,我们直接在 Kibana 就能看到日志情况。
数据处理引擎,可以处理每秒几万条的日志;它支持动态的从各种数据源搜集数据,并对数据进行过滤、分析、丰富、统一格式等 *** 作,然后存储到 ES
lgstash是怎么工作的logstash做的事情分三个阶段依次执行:输入——> 处理filter(不是必须)——> 输出
使用管道方式进行日志的搜集处理和输出:有点类似*NIX系统的管道命令 xxx | ccc | ddd,xxx执行完了会执行ccc,然后执行ddd
input{
#从文件中输入
file{
path => "/var/log/nginx/access.log" #文件路径
start_position => "beginning" #开始收集的位置
type => "nginx_access_log" # 类型(标识)
}
#通过网络将系统日志消息读取为事件
syslog{
port =>"514" # port 指定监听端口(同时建立TCP/UDP的514端口的监听)
type => "syslog"
}
#从syslogs读取需要实现配置rsyslog:
# cat /etc/rsyslog.conf 加入一行
# *.* @172.17.128.200:514 #指定日志输入到这个端口,然后logstash监听这个端口,如果有新日志输入则读取
# service rsyslog restart #重启日志服务
从Elastic beats接收事件
beats {
port => 5044 #要监听的端口
}
#将 kafka topic 中的数据读取为事件
kafka{
bootstrap_servers=> "kafka01:9092,kafka02:9092,kafka03:9092" # 用于建立群集初始连接的Kafka实例的URL列表。
topics => ["access_log"] #要订阅的主题列表,kafka topics
group_id => "logstash-file" #消费者所属组的标识符,默认为logstash。
kafka中一个主题的消息将通过相同的方式分发到Logstash的group_id
codec => "json" #解码为json
}
#从redis收集数据
redis {
batch_count => 1 #EVAL命令返回的事件数目
data_type => "list" #logstash redis插件工作方式
key => "logstash-test-list" #监听的键值
host => "127.0.0.1" #redis地址
port => 6379 #redis端口号
password => "123qwe" #如果有安全认证,此项为密码
db => 0 #redis数据库的编号
threads => 1 #启用线程数量
}
#从mysql收集数据
jdbc {
# mysql相关jdbc配置
jdbc_connection_string => "jdbc:mysql://host:3306/bcm_test?useUnicode=true&characterEncoding=utf-8&useSSL=false"
jdbc_user => "root"
jdbc_password => "root"
# jdbc连接mysql驱动的文件目录,可去官网下载:https://dev.mysql.com/downloads/connector/j/
jdbc_driver_library => "./config/mysql-connector-java-5.1.46.jar"
# the name of the driver class for mysql
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => true
jdbc_page_size => "50000"
jdbc_default_timezone =>"Asia/Shanghai"
# mysql文件, 也可以直接写SQL语句在此处,如下:
# statement => "select * from t_order where update_time >= :sql_last_value;"
statement_filepath => "./config/jdbc.sql"
# 这里类似crontab,可以定制定时 *** 作,比如每分钟执行一次同步(分 时 天 月 年)
schedule => "* * * * *"
#type => "jdbc"
# 是否记录上次执行结果, 如果为真,将会把上次执行到的 tracking_column 字段的值记录下来,保存到 last_run_metadata_path 指定的文件中
#record_last_run => true
# 是否需要记录某个column 的值,如果record_last_run为真,可以自定义我们需要 track 的 column 名称,此时该参数就要为 true. 否则默认 track 的是 timestamp 的值.
use_column_value => true
# 如果 use_column_value 为真,需配置此参数. track 的数据库 column 名,该 column 必须是递增的. 一般是mysql主键
tracking_column => "update_time"
tracking_column_type => "timestamp"
last_run_metadata_path => "./logstash_capital_bill_last_id"
# 是否清除 last_run_metadata_path 的记录,如果为真那么每次都相当于从头开始查询所有的数据库记录
clean_run => false
#是否将 字段(column) 名称转小写
lowercase_column_names => false
}
}
filter{
#需要安装插件
#解析文本并构造 。
把非结构化日志数据通过正则解析成结构化和可查询化
#注意这里grok 可以有多个match匹配规则,如果前面的匹配失败可以使用后面的继续匹配
#grok 语法:%{SYNTAX:SEMANTIC} 即 %{正则:自定义字段名}
grok{
match => {"message" => "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] \"%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) \"(?:-|%{DATA:referrer})\" \"%{DATA:user_agent}\" (?:%{IP:proxy}|-) %{DATA:upstream_addr} %{NUMBER:upstream_request_time:float} %{NUMBER:upstream_response_time:float}"}
match => {"message" => "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] \"%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) \"%{DATA:referrer}\" \"%{DATA:user_agent}\" \"%{DATA:proxy}\""}
}
if [request] {
#用于解码被编码的字段,可以解决URL中 中文乱码的问题
urldecode {
field => "request"
}
#ruby插件可以执行任意Ruby代码
ruby {
init => "@kname = ['url_path','url_arg']"
code => "
new_event = LogStash::Event.new(Hash[@kname.zip(event.get('request').split('?'))])
event.append(new_event)"
}
if [url_arg] {
ruby {
init => "@kname = ['key', 'value']"
code => "event.set('url_args', event.get('url_arg').split('&').collect {|i| Hash[@kname.zip(i.split('='))]})"
}
}
}
# 根据来自Maxmind GeoLite2数据库的数据添加有关IP地址的地理位置的信息
geoip{
source => "clientip"
}
# 添加有关用户代理(如系列, *** 作系统,版本和设备)的信息
useragent{
source => "user_agent"
target => "ua"
remove_field => "user_agent"
}
#日期解析 解析字段中的日期,然后转存到@timestamp
date {
match => ["timestamp","dd/MMM/YYYY:HH:mm:ss Z"]
locale => "en"
}
#对字段做处理 重命名、删除、替换和修改字段。
covert:类型转换; split:使用分隔符把字符串分割成数组; merge:合并字段 数组和字符串 ,字符串和字符串 rename: 对字段重命名 remove_field:移除字段 join:用分隔符连接数组,如果不是数组则不做处理
mutate{
remove_field => ["message","timestamp","request","url_arg"]
}
}
output{
# 输出到文件
file {
path => "/data/logstash/%{host}/{application}
codec => line { format => "%{message}"} }
}
# 输出到es
elasticsearch {
hosts => "localhost:9200"
index => "nginx-access-log-%{+YYYY.MM.dd}"
}
# 输出到控制台
stdout {
codec => rubydebug
}
# 将事件发送到kafka
kafka{
bootstrap_servers => "localhost:9092"
topic_id => "test_topic" #必需的设置。
生成消息的主题
}
}
kibana的使用
kibana本次实验主要用于查看日志和开发工具使用
篇幅过大,为防止小伙伴们产生厌烦心理,集群搭建和负载均衡,我将在下一篇中完成。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)