ELK实现模糊搜索并高亮


ELK实现全文模糊搜索高亮

相关项目:后端:gqd000/OurBlog: 多人博客系统 (github.com)

​ 前端:1664635775/BlogDemo (github.com)

  1. 后端代码部分:

    • 以搜索文章内容和标题为zhu例,先创建显示的实体类ArticleSearchDTO

      package com.digging.model.dto;
      
      import lombok.AllArgsConstructor;
      import lombok.Builder;
      import lombok.Data;
      import lombok.NoArgsConstructor;
      import org.springframework.data.annotation.Id;
      import org.springframework.data.elasticsearch.annotations.Document;
      import org.springframework.data.elasticsearch.annotations.Field;
      import org.springframework.data.elasticsearch.annotations.FieldType;
      
      import java.time.LocalDateTime;
      
      /**
       * 文章搜索结果
       *
       * @author 11921
       */
      @Data
      @Builder
      @AllArgsConstructor
      @NoArgsConstructor
      @Document(indexName = "article")
      public class ArticleSearchDTO {
      
          /**
           * 文章id
           */
          @Id
          private Long id;
      
          /**
           * 文章标题
           */
          @Field(type = FieldType.Text, analyzer = "ik_max_word",searchAnalyzer = "ik_smart")
          private String name;
      
          /**
           * 文章内容
           */
          @Field(type = FieldType.Text, analyzer = "ik_max_word",searchAnalyzer = "ik_smart")
          private String content;
      
          /**
           * 文章状态
           */
          @Field(type = FieldType.Integer)
          private Integer available;
      
      
          //文章作者username
          private String username;
          //文章作者id
          private Long userId;
          //文章创建时间
          private LocalDateTime createTime;
          //文章修改时间
          private LocalDateTime updateTime;
      
      }
    • ArticleService中加入新方法:

      /**
           * 搜索文章
           *
           * @param keywords 条件
           * @return 文章
           */
          List<ArticleSearchDTO> listArticlesBySearch(String keywords);
    • ArticleServiceImpl加入新方法:

      @Override
          public List<ArticleSearchDTO> listArticlesBySearch(String keywords) {
              return searchArticle(buildQuery(keywords));
          }
      
      
      
      /**
           * 搜索文章构造
           *
           * @param keywords 条件
           * @return es条件构造器
           */
          private NativeSearchQueryBuilder buildQuery(String keywords) {
              // 条件构造器
              NativeSearchQueryBuilder nativeSearchQueryBuilder = new NativeSearchQueryBuilder();
              BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
              // 根据关键词搜索文章标题或内容
              if (Objects.nonNull(keywords)) {
                  boolQueryBuilder.must(QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("name", keywords))
                                  .should(QueryBuilders.matchQuery("content", keywords)))
                          .must(QueryBuilders.termQuery("available", 0));
              }
              // 查询
              nativeSearchQueryBuilder.withQuery(boolQueryBuilder);
              return nativeSearchQueryBuilder;
          }
      
      
          /**
           * 文章搜索结果高亮
           *
           * @param nativeSearchQueryBuilder es条件构造器
           * @return 搜索结果
           */
          private List<ArticleSearchDTO> searchArticle(NativeSearchQueryBuilder nativeSearchQueryBuilder) {
              // 添加文章标题高亮
              HighlightBuilder.Field titleField = new HighlightBuilder.Field("name");
              titleField.preTags("<span style='color:#f47466'>");
              titleField.postTags("</span>");
              // 添加文章内容高亮
              HighlightBuilder.Field contentField = new HighlightBuilder.Field("content");
              contentField.preTags("<span style='color:#f47466'>");
              contentField.postTags("</span>");
              //显示文章全部内容
              contentField.numOfFragments(0);
              //显示除html标签外 200个字符
      //        contentField.fragmentSize(200);
              nativeSearchQueryBuilder.withHighlightFields(titleField, contentField);
              // 搜索
              SearchHits<ArticleSearchDTO> search = elasticsearchRestTemplate.search(nativeSearchQueryBuilder.build(), ArticleSearchDTO.class);
              return search.getSearchHits().stream().map(hit -> {
                  ArticleSearchDTO article = hit.getContent();
                  // 获取文章标题高亮数据
                  List<String> titleHighLightList = hit.getHighlightFields().get("name");
                  if (CollectionUtils.isNotEmpty(titleHighLightList)) {
                      // 替换标题数据
                      article.setName(titleHighLightList.get(0));
                  }
                  // 获取文章内容高亮数据
                  List<String> contentHighLightList = hit.getHighlightFields().get("content");
                  if (CollectionUtils.isNotEmpty(contentHighLightList)) {
                      // 替换内容数据
                      article.setContent(contentHighLightList.get(0));
                  }
                  return article;
              }).collect(Collectors.toList());
          }
    • ArticleController中编写接口:

      //搜索文章
          @GetMapping("/search")
          public Result<List<ArticleSearchDTO>> searchBlog(String keywords)
          {
              List<ArticleSearchDTO> articleSearchDTOS = articleService.listArticlesBySearch(keywords);
      
              if(articleSearchDTOS.size() == 0)
              {
                  return Result.success("未搜索到相关结果!");
              }
              return Result.success(articleSearchDTOS,"搜索成功!");
          }
  2. Logstash配置数据库数据与引擎增量同步

    • 需要对数据库字段更名时,用filter配置:

      filter {
          mutate {
          	# 将数据库中“user_id”属性复制到引擎中的“userId字段”
              rename => { "user_id" => "userId" }
              rename => { "create_time" => "createTime" }
              rename => { "update_time" => "updateTime" }  
          }
      }
    • 新建article.config文件:

      input {
        jdbc {
        	# mysql-connector-java目录,注意版本一致
          jdbc_driver_library => "/home/www/OurBlog/Elastic/Logstash/mysql-connector-java-8.0.24.jar"
          jdbc_driver_class => "com.mysql.cj.jdbc.Driver"
          jdbc_connection_string => "jdbc:mysql://HOST:3306/database"
          jdbc_user => "username"
          jdbc_password => "password"
          # 配置同步时间,各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新,
          # "*/5 * * * *"代表每5分钟同步一次
          schedule => "* * * * *"
          # 全选,全量更新
          # statement => "SELECT * FROM article"
          # 选择更新时间大于最后一次查询数据库时间条目,增量更新
          statement => "SELECT * FROM article WHERE update_time >= :sql_last_value"
          use_column_value => true
          tracking_column_type => "timestamp"
          tracking_column => "update_time"
          last_run_metadata_path => "syncpoint_table"
        }
      }
      
      # 数据库字段重命名再加入到索引
      filter {
          mutate {
              rename => { "user_id" => "userId" }
              rename => { "create_time" => "createTime" }
              rename => { "update_time" => "updateTime" }  
          }
      }
      
      output {
          elasticsearch {
              # ES的IP地址及端口
              hosts => ["localhost:9200"]
              # 索引名称 可自定义
              index => "article"
              # 需要关联的数据库中有有一个id字段,对应类型中的id
              document_id => "%{id}"
              document_type => "article"
          }
          stdout {
              # JSON格式输出
              codec => json_lines
          }
      }
    • 在logstash目录下带着配置文件运行:nohup logstash-7.12.0/bin/logstash -f logstash-7.12.0/config/article.conf &,使用tail -f nohup.out可查看启动日志。


文章作者: 一袖南烟顾
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 一袖南烟顾 !
评论
  目录