文章目录世界上并没有完美的程序,但是我们并不因此而沮丧,因为写程序就是一个不断追求完美的过程。
- normalize
- 场景
- 作用
- 配置项
- 使用
- 索引
- method : rescale_0_1
- method : rescale_0_100
- method : percent_of_sum
- method : mean
- method : softmax
- 父管道聚合
- bucket值的规范化
-
buckets_path
- 要规范化的bucket的路径
-
method
-
规范化方法
-
rescale_0_1
- (x - min) / (max - min)
-
rescale_0_100
- 100 * (x - min) / (max - min)
-
percent_of_sum
- x / sum
-
mean
- (x - mean) / (max - min)
-
zscore(实测时invalid)
- (x - mean) / stdev
- stdev是标准差
-
softmax
- e^x / sum_e_x
- sum_e_x是原始值的指数和
-
-
format
- 返回值的格式
- 如,00.00%,00.00
DELETE /normalize_test PUT /normalize_test { "mappings" : { "properties" : { "type" : {"type" : "integer"}, "num" : {"type" : "integer"}, "date" : {"type" : "date"} } } } POST /normalize_test/_bulk {"index" : {"_id" : 1}} {"type" : 1, "num" : 400, "date" : "2001-01-10"} {"index" : {"_id" : 2}} {"type" : 2, "num" : 450, "date" : "2001-01-20"} {"index" : {"_id" : 3}} {"type" : 1, "num" : 580, "date" : "2001-02-10"} {"index" : {"_id" : 4}} {"type" : 2, "num" : 990, "date" : "2001-03-20"} {"index" : {"_id" : 5}} {"type" : 1, "num" : 660, "date" : "2001-04-21"} {"index" : {"_id" : 6}} {"type" : 1, "num" : 680, "date" : "2001-05-21"}method : rescale_0_1
GET /normalize_test/_search { "size" : 0, "aggs" : { "month_aggs" : { "date_histogram": { "field" : "date", "calendar_interval" : "month" }, "aggs" : { "sum_aggs" : { "sum": { "field": "num" } }, "normalize_aggs" : { "normalize" : { "buckets_path" : "sum_aggs", "method" : "rescale_0_1", "format" : "00.00" } } } } } }
- 返回结果
{ "took" : 2, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 6, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "month_aggs" : { "buckets" : [ { "key_as_string" : "2001-01-01T00:00:00.000Z", "key" : 978307200000, "doc_count" : 2, "sum_aggs" : { "value" : 850.0 }, "normalize_aggs" : { "value" : 0.6585365853658537, "value_as_string" : "00.66" } }, { "key_as_string" : "2001-02-01T00:00:00.000Z", "key" : 980985600000, "doc_count" : 1, "sum_aggs" : { "value" : 580.0 }, "normalize_aggs" : { "value" : 0.0, "value_as_string" : "00.00" } }, { "key_as_string" : "2001-03-01T00:00:00.000Z", "key" : 983404800000, "doc_count" : 1, "sum_aggs" : { "value" : 990.0 }, "normalize_aggs" : { "value" : 1.0, "value_as_string" : "01.00" } }, { "key_as_string" : "2001-04-01T00:00:00.000Z", "key" : 986083200000, "doc_count" : 1, "sum_aggs" : { "value" : 660.0 }, "normalize_aggs" : { "value" : 0.1951219512195122, "value_as_string" : "00.20" } }, { "key_as_string" : "2001-05-01T00:00:00.000Z", "key" : 988675200000, "doc_count" : 1, "sum_aggs" : { "value" : 680.0 }, "normalize_aggs" : { "value" : 0.24390243902439024, "value_as_string" : "00.24" } } ] } } }method : rescale_0_100
GET /normalize_test/_search { "size" : 0, "aggs" : { "month_aggs" : { "date_histogram": { "field" : "date", "calendar_interval" : "month" }, "aggs" : { "sum_aggs" : { "sum": { "field": "num" } }, "normalize_aggs" : { "normalize" : { "buckets_path" : "sum_aggs", "method" : "rescale_0_100", "format" : "00" } } } } } }
- 返回结果
{ "took" : 34, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 6, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "month_aggs" : { "buckets" : [ { "key_as_string" : "2001-01-01T00:00:00.000Z", "key" : 978307200000, "doc_count" : 2, "sum_aggs" : { "value" : 850.0 }, "normalize_aggs" : { "value" : 65.85365853658537, "value_as_string" : "66" } }, { "key_as_string" : "2001-02-01T00:00:00.000Z", "key" : 980985600000, "doc_count" : 1, "sum_aggs" : { "value" : 580.0 }, "normalize_aggs" : { "value" : 0.0, "value_as_string" : "00" } }, { "key_as_string" : "2001-03-01T00:00:00.000Z", "key" : 983404800000, "doc_count" : 1, "sum_aggs" : { "value" : 990.0 }, "normalize_aggs" : { "value" : 100.0, "value_as_string" : "100" } }, { "key_as_string" : "2001-04-01T00:00:00.000Z", "key" : 986083200000, "doc_count" : 1, "sum_aggs" : { "value" : 660.0 }, "normalize_aggs" : { "value" : 19.51219512195122, "value_as_string" : "20" } }, { "key_as_string" : "2001-05-01T00:00:00.000Z", "key" : 988675200000, "doc_count" : 1, "sum_aggs" : { "value" : 680.0 }, "normalize_aggs" : { "value" : 24.390243902439025, "value_as_string" : "24" } } ] } } }method : percent_of_sum
GET /normalize_test/_search { "size" : 0, "aggs" : { "month_aggs" : { "date_histogram": { "field" : "date", "calendar_interval" : "month" }, "aggs" : { "sum_aggs" : { "sum": { "field": "num" } }, "normalize_aggs" : { "normalize" : { "buckets_path" : "sum_aggs", "method" : "percent_of_sum", "format" : "00.00%" } } } } } }
- 返回结果
{ "took" : 3, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 6, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "month_aggs" : { "buckets" : [ { "key_as_string" : "2001-01-01T00:00:00.000Z", "key" : 978307200000, "doc_count" : 2, "sum_aggs" : { "value" : 850.0 }, "normalize_aggs" : { "value" : 0.22606382978723405, "value_as_string" : "22.61%" } }, { "key_as_string" : "2001-02-01T00:00:00.000Z", "key" : 980985600000, "doc_count" : 1, "sum_aggs" : { "value" : 580.0 }, "normalize_aggs" : { "value" : 0.15425531914893617, "value_as_string" : "15.43%" } }, { "key_as_string" : "2001-03-01T00:00:00.000Z", "key" : 983404800000, "doc_count" : 1, "sum_aggs" : { "value" : 990.0 }, "normalize_aggs" : { "value" : 0.2632978723404255, "value_as_string" : "26.33%" } }, { "key_as_string" : "2001-04-01T00:00:00.000Z", "key" : 986083200000, "doc_count" : 1, "sum_aggs" : { "value" : 660.0 }, "normalize_aggs" : { "value" : 0.17553191489361702, "value_as_string" : "17.55%" } }, { "key_as_string" : "2001-05-01T00:00:00.000Z", "key" : 988675200000, "doc_count" : 1, "sum_aggs" : { "value" : 680.0 }, "normalize_aggs" : { "value" : 0.18085106382978725, "value_as_string" : "18.09%" } } ] } } }method : mean
GET /normalize_test/_search { "size" : 0, "aggs" : { "month_aggs" : { "date_histogram": { "field" : "date", "calendar_interval" : "month" }, "aggs" : { "sum_aggs" : { "sum": { "field": "num" } }, "normalize_aggs" : { "normalize" : { "buckets_path" : "sum_aggs", "method" : "mean", "format" : "0.00" } } } } } }
- 返回结果
{ "took" : 5, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 6, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "month_aggs" : { "buckets" : [ { "key_as_string" : "2001-01-01T00:00:00.000Z", "key" : 978307200000, "doc_count" : 2, "sum_aggs" : { "value" : 850.0 }, "normalize_aggs" : { "value" : 0.23902439024390243, "value_as_string" : "0.24" } }, { "key_as_string" : "2001-02-01T00:00:00.000Z", "key" : 980985600000, "doc_count" : 1, "sum_aggs" : { "value" : 580.0 }, "normalize_aggs" : { "value" : -0.4195121951219512, "value_as_string" : "-0.42" } }, { "key_as_string" : "2001-03-01T00:00:00.000Z", "key" : 983404800000, "doc_count" : 1, "sum_aggs" : { "value" : 990.0 }, "normalize_aggs" : { "value" : 0.5804878048780487, "value_as_string" : "0.58" } }, { "key_as_string" : "2001-04-01T00:00:00.000Z", "key" : 986083200000, "doc_count" : 1, "sum_aggs" : { "value" : 660.0 }, "normalize_aggs" : { "value" : -0.22439024390243903, "value_as_string" : "-0.22" } }, { "key_as_string" : "2001-05-01T00:00:00.000Z", "key" : 988675200000, "doc_count" : 1, "sum_aggs" : { "value" : 680.0 }, "normalize_aggs" : { "value" : -0.17560975609756097, "value_as_string" : "-0.18" } } ] } } }method : softmax
- 由于是指数,所以测试本方法时索引中的num都去掉一个0
GET /normalize_test/_search { "size" : 0, "aggs" : { "month_aggs" : { "date_histogram": { "field" : "date", "calendar_interval" : "month" }, "aggs" : { "sum_aggs" : { "sum": { "field": "num" } }, "normalize_aggs" : { "normalize" : { "buckets_path" : "sum_aggs", "method" : "softmax" } } } } } }
- 返回结果
{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 6, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "month_aggs" : { "buckets" : [ { "key_as_string" : "2001-01-01T00:00:00.000Z", "key" : 978307200000, "doc_count" : 2, "sum_aggs" : { "value" : 85.0 }, "normalize_aggs" : { "value" : 8.315280276640997E-7 } }, { "key_as_string" : "2001-02-01T00:00:00.000Z", "key" : 980985600000, "doc_count" : 1, "sum_aggs" : { "value" : 58.0 }, "normalize_aggs" : { "value" : 1.5628808897545835E-18 } }, { "key_as_string" : "2001-03-01T00:00:00.000Z", "key" : 983404800000, "doc_count" : 1, "sum_aggs" : { "value" : 99.0 }, "normalize_aggs" : { "value" : 0.9999991684719333 } }, { "key_as_string" : "2001-04-01T00:00:00.000Z", "key" : 986083200000, "doc_count" : 1, "sum_aggs" : { "value" : 66.0 }, "normalize_aggs" : { "value" : 4.658882271108809E-15 } }, { "key_as_string" : "2001-05-01T00:00:00.000Z", "key" : 988675200000, "doc_count" : 1, "sum_aggs" : { "value" : 68.0 }, "normalize_aggs" : { "value" : 3.442474245953642E-14 } } ] } } }
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)