1.8 7.6.1
导入elasticsearch
org.springframework.boot spring-boot-starter-data-elasticsearch
提前导入fastjson、lombok
1.2创建并编写配置类–>连上EScom.alibaba fastjson1.2.70 org.projectlombok lomboktrue
@Configuration public class ElasticSearchConfig { // 注册 rest高级客户端 @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("127.0.0.1",9200,"http") ) ); return client; } }1.3测试索引的 *** 作 1、索引的创建
@Test void testCreateIndex() throws IOException { //1、创建索引请求 CreateIndexRequest request = new CreateIndexRequest("jd_goods"); //2、执行创建请求 CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT); System.out.println("执行创建请求===>"+createIndexResponse); }2、测试获取索引
@Test void testExistIndex() throws IOException { GetIndexRequest request = new GetIndexRequest("kuang_index"); boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT); System.out.println("测试获取索引===>"+exists); }3、测试删除索引
@Test void testDeleteIndex() throws IOException { DeleteIndexRequest request = new DeleteIndexRequest("kuang_index"); AcknowledgedResponse delete = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT); System.out.println("是否删除成功===>"+delete); }4、测试添加文档
@Test void testAdddocument() throws IOException { //创建对象 User user = new User("狂神说",3); //创建请求 IndexRequest request = new IndexRequest("kuang_index"); //规则 request.id("1"); request.timeout(Timevalue.timevalueSeconds(1)); request.timeout("1s"); //将我们的数据放入请求 json request.source(JSON.toJSONString(user), XContentType.JSON); //客户端发送请求 获取响应的结果 IndexResponse indexResponse = restHighLevelClient.index(request, RequestOptions.DEFAULT); System.out.println(indexResponse.toString()); System.out.println(indexResponse.status());//对应我们命令返回的状态 CREATED }5、获取文档 判断是否存在
@Test void testIsExists() throws IOException { GetRequest getRequest = new GetRequest("kuang_index","1"); //不获取返回的 _source的上下文 getRequest.fetchSourceContext(new FetchSourceContext(false)); getRequest.storedFields("_none_"); boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT); System.out.println(exists); }6、获取文档的信息
@Test void testGetdocument() throws IOException { GetRequest getRequest = new GetRequest("kuang_index","1"); GetResponse documentFields = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT); System.out.println(getRequest); System.out.println(documentFields); }7、更新文档的信息
@Test void testUpdateRequest() throws IOException { UpdateRequest updateRequest = new UpdateRequest("test","1"); updateRequest.timeout("1s"); User user = new User("狂神说java",18); updateRequest.doc(JSON.toJSONString(user),XContentType.JSON); UpdateResponse updateResponse = restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT); System.out.println(updateResponse.status()); }8、删除文档记录
@Test void testDeleteRequest() throws IOException { DeleteRequest request = new DeleteRequest("kuang_index","2"); request.timeout("1s"); DeleteResponse delete = restHighLevelClient.delete(request, RequestOptions.DEFAULT); System.out.println(delete.status()); }9、批量插入数据
@Test void testBulkRequest() throws IOException { BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("10s"); ArrayList10、查询userList = new ArrayList<>(); userList.add(new User("kuangshen1",3)); userList.add(new User("kuangshen2",4)); userList.add(new User("kuangshen3",5)); userList.add(new User("kuangshen4",6)); userList.add(new User("kuangshen5",13)); userList.add(new User("kuangshen6",23)); userList.add(new User("kuangshen7",33)); // 批处理请求 for (int i = 0; i < userList.size(); i++) { bulkRequest.add(new IndexRequest("kuang_index") .id(""+(i+1)) .source(JSON.toJSONString(userList.get(i)),XContentType.JSON)); } BulkResponse bulkResponse = restHighLevelClient.bulk(bulkRequest,RequestOptions.DEFAULT); System.out.println(bulkResponse.hasFailures());//是否失败 返回false 代表成功 }
@Test void testSearch() throws IOException { SearchRequest searchRequest = new SearchRequest(ESconst.ES_INDEX); //构建搜索条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //高亮 sourceBuilder.highlighter(); //查询条件 我们可以使用 QueryBuilders 工具来实现 //QueryBuilders.termQuery() 精确 //QueryBuilders.matchAllQuery() 匹配所有 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "qinjiang1"); MatchAllQueryBuilder allQueryBuilder = QueryBuilders.matchAllQuery(); System.out.println("allQueryBuilder===>>"+allQueryBuilder); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new Timevalue(60, TimeUnit.SECONDS)); //放入请求 searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse.getHits())); System.out.println("====================================="); for (SearchHit documentFields : searchResponse.getHits().getHits()) { System.out.println(documentFields.getSourceAsMap()); } }二、ElasticSearch实战 2.1导入依赖
2.2编写 application.preperties配置文件1.8 7.6.1 org.jsoup jsoup1.10.2 com.alibaba fastjson1.2.70 org.springframework.boot spring-boot-starter-data-elasticsearchorg.springframework.boot spring-boot-starter-thymeleaforg.springframework.boot spring-boot-starter-weborg.springframework.boot spring-boot-devtoolsruntime true org.springframework.boot spring-boot-configuration-processortrue org.projectlombok lomboktrue org.springframework.boot spring-boot-starter-testtest
# 更改端口,防止冲突 server.port=9999 # 关闭thymeleaf缓存 spring.thymeleaf.cache=false2.3测试controller和view
@Controller public class IndexController { @GetMapping({"/","index"}) public String index(){ return "index"; } }三、爬虫京东的数据到ES 3.1编写Config
@Configuration public class ElasticSearchClientConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient client = new RestHighLevelClient( RestClient.builder( new HttpHost("127.0.0.1", 9200, "http"))); return client; } }3.2编写service
因为是爬取的数据,那么就不走Dao,以下编写都不会编写接口,开发中必须严格要求编写
ContentService
// 1、解析数据放入 es 索引中 public Boolean parseContents(String keyword) throws Exception { // 获取内容 Listcontents = new HtmlParseUtil().params(keyword); // 内容放入 es 中 BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m"); // 可更具实际业务是指 for (int i = 0; i < contents.size(); i++) { bulkRequest.add( new IndexRequest("jd_goods") .id(""+(i+1)) .source(JSON.toJSONString(contents.get(i)), XContentType.JSON) ); } BulkResponse bulk = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); restHighLevelClient.close(); return !bulk.hasFailures(); } // 2、根据keywords分页查询结果 public List
编写controller
@Autowired private ContentService contentService; @GetMapping("/parse/{keywords}") @ResponseBody public Boolean parses(@PathVariable("keywords") String keywords) throws Exception { return contentService.parseContents(keywords); } @ResponseBody @GetMapping("/search/{keywords}/{pageNo}/{pageSize}") public List3.2爬虫工具类> search(@PathVariable("keywords") String keywords, @PathVariable("pageNo") int pageNo, @PathVariable("pageSize") int pageSize) throws IOException { return contentService.highlightBuilder(keywords,1,10); }
public class HtmlParseUtil { public static void main(String[] args) throws Exception { new HtmlParseUtil().params("码出高效").forEach(System.out::println); } public List3.3测试效果 待解决问题: 1、只能半模糊查询,不可以精确查询; 2、分词时 自定义分词 需手动建.dic 添加; 3、轮询时间如何设置为秒;params(String keywords) throws Exception { //获取请求 https://search.jd.com/Search?keyword=java //前提 需要联网 String url = "https://search.jd.com/Search?keyword="+keywords+"&enc=utf-8"; //解析网页 document document = Jsoup.parse(new URL(url), 30000); //所有你在js中可以使用的方法 这里都能用 Element element = document.getElementById("J_goodsList"); //获取所有的li元素 Elements elements = element.getElementsByTag("li"); ArrayList goodList = new ArrayList<>(); for (Element el : elements) { //关于图片特别多的网站 所有图片都是延迟加载的data-lazy-img String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = el.getElementsByClass("p-price").eq(0).text(); String title = el.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setImg(img); content.setTitle(title); content.setPrice(price); goodList.add(content); } return goodList; } }
ElasticSearch入门学习笔记(一)概念篇
ElasticSearch入门学习笔记(二)软件安装篇
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)