关于使用datax同步数据最重要的是——作业json的编写
参考案列:datax-elasticsearch/elasticsearchreader.md at master · mario1oreo/datax-elasticsearch · GitHub
但是实际 *** 作还是存在一些差异
es的基本语法:
参考:Elasticsearch基本语法 - 半岛弥情 - 博客园
工作上的实践成功案例:es版本号5.4.1
es_to_tdh
json:
{
"job":{
"setting":{
"speed":{
"channel":20
}
},
"content":[
{
"reader":{
"name": "elasticsearchreader",
"parameter": {
"endpoint": "http://172.253.32.35:9200",
"accessId": "",
"accessKey": "",
"index": "kbs.corp_prof_announcement_search_es",
"type": "default_type_",
"column": [
"ins_num",
"ins_fn",
"short_name",
"event_time",
"data_source",
"title",
"content",
"content_link",
"type_code",
"type_name",
"stk_code",
"keyw_name",
"is_valid",
"dm_src_info",
"dm_created_time",
"dm_created_by",
"dm_updated_time",
"dm_updated_by"
],
"search":{
"match_all": {}
},
"sort": {},
"retryCount": 3,
"scroll": "3m",
"retrySleepTime": 1000,
"connTimeOut": 1000,
"readTimeOut": 3000
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://nameservice1",
"path":"/user/data/src/corp_prof_announcement_search_ext",
"fileName":"corp_prof_announcement_search",
"fileType":"text",
"writeMode":"truncate",
"fieldDelimiter":"u0001",
"haveKerberos":"true",
"kerberosKeytabFilePath":"/home/TDH-Client/TDH-Client/conf/inceptor/dmadmin.keytab",
"kerberosPrincipal":"dmadmin@TDH",
"hadoopConfig":{
"dfs.data.transfer.protection":"authentication",
"hadoop.rpc.protection":"authentication",
"dfs.nameservices":"nameservice1",
"dfs.ha.namenodes.nameservice1":"nn1,nn2",
"dfs.namenode.rpc-address.nameservice1.nn1":"zszq-tdh1:8020",
"dfs.namenode.rpc-address.nameservice1.nn2":"zszq-tdh5:8020",
"dfs.client.use.datanode.hostname":"true",
"dfs.client.failover.proxy.provider.nameservice1":"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
"column":[
{
"name":"ins_num",
"type":"string"
},
{
"name":"ins_fn",
"type":"string"
},
{
"name":"short_name",
"type":"string"
},
{
"name":"event_time",
"type":"string"
},
{
"name":"data_source",
"type":"string"
},
{
"name":"title",
"type":"string"
},
{
"name":"content",
"type":"string"
},
{
"name":"content_link",
"type":"string"
},
{
"name":"type_code",
"type":"string"
},
{
"name":"type_name",
"type":"string"
},
{
"name":"stk_code",
"type":"string"
},
{
"name":"keyw_name",
"type":"string"
},
{
"name":"is_valid",
"type":"string"
},
{
"name":"dm_src_info",
"type":"string"
},
{
"name":"dm_created_time",
"type":"string"
},
{
"name":"dm_created_by",
"type":"string"
},
{
"name":"dm_updated_time",
"type":"string"
},
{
"name":"dm_updated_by",
"type":"string",
"type":"string"
}
]
}
}
}
]
}
}
注:index是es表的索引名,不能用别名
type是es表的类型
search是查询语句,
"match_all": {}是相当于sql语言中的 select * from 表名;
如果是加条件的话:
"search":{
"term":{
"ins_fn":{
"value":"值1"
}
}
}
相当于sql中的 select * from 表名 where ins_fn = '值1';
第一篇博客,写的并不是很好,但是确实是工作上的遇到的探索性问题,主要是对自己遇到的问题做一下笔记,也希望能帮助到大家,谢谢!!!
感谢我的老大:Softmax
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)