lucene全文检索(搜索)

硅谷探秘者 1146 0 0

lucene全文检索

依赖jar
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>

 

基础类
import lombok.Data;
/**
 * @ClassName Article
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
@Data
public class Article {
    private String id;
    private String title;
    private String content;
}
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;

/**
 * @ClassName d
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/

public class LuceneUtils {
    public static Directory directory;
    public static Analyzer analyzer;
    static{
        try{
            directory = FSDirectory.open(Paths.get("d:/test/indexDir"));
            analyzer = new StandardAnalyzer();
        }catch(Exception e){
            e.printStackTrace();
        }
    }
}
import com.example.demo.entity.Article;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.*;

import java.io.IOException;


/**
 * @ClassName DocumentUtils
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
public class DocumentUtils {
    public static Document article2Document(Article article){
        Document document = new Document();
        Field idField = new Field("id",article.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        Field titleField = new Field("title",article.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
        Field contentField = new Field("content",article.getContent(), Field.Store.YES, Field.Index.ANALYZED);
        document.add(idField);
        document.add(titleField);
        document.add(contentField);
        return document;
    }

    public static Article document2Article(Document document){
        Article article = new Article();
        article.setId(document.get("id"));
        article.setTitle(document.get("title"));
        article.setContent(document.get("content"));
        return article;
    }

    public static Article document2Article(Document document, Highlighter highlighter, QueryScorer queryScorer,
                                           IndexSearcher indexSearcher, ScoreDoc scoreDoc){
        Fragmenter fragment=new SimpleSpanFragmenter(queryScorer);
        highlighter.setTextFragmenter(fragment);
        TokenStream tokenStream= null;
        Article article = new Article();
        article.setId(document.get("id"));
        try {
            tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "title", LuceneUtils.analyzer);
            String title=highlighter.getBestFragment(tokenStream, document.get("title"));
            article.setTitle(title);

            tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "content", LuceneUtils.analyzer);
            String content=highlighter.getBestFragment(tokenStream, document.get("content"));
            article.setContent(content);
        } catch (IOException e) {
            e.printStackTrace();
        }catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
        return article;
    }
}
搜索测试
import com.example.demo.entity.Article;
import com.example.demo.server.DocumentUtils;
import com.example.demo.server.LuceneUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName TestController
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
@RestController
public class TestController {

    /**
     * 创建
     * @param article
     * @return
     */
    @GetMapping("create")
    public Article create(Article article){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.addDocument(DocumentUtils.article2Document(article));
            indexWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return article;
    }

    /**
     * 根据id查询
     * @param id
     * @return
     */
    @GetMapping("searchById")
    public Article searchById(String id){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            Query query = new TermQuery(new Term("id",id));
            TopDocs topDocs = indexSearcher.search(query,1);
            if(topDocs.scoreDocs.length>0){
                Document document =  indexSearcher.doc(topDocs.scoreDocs[0].doc);
                Article article = DocumentUtils.document2Article(document);
                return article;
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * 搜索
     * @param search
     * @return
     */
    @GetMapping("search/basic")
    public List<Article> searchBasic(String search,String id){
        IndexSearcher indexSearcher = null;
        try {
            BooleanQuery booleanQuery = new BooleanQuery();
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            String[]  fields=new String[]{"title","content"};
            QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
            Query query = queryParser.parse(search);
            booleanQuery.add(query,BooleanClause.Occur.SHOULD);
            if(id!=null&&!"".equals(id)){
                Query query2 = new TermQuery(new Term("id",id));
                booleanQuery.add(query2,BooleanClause.Occur.MUST_NOT);
            }
            TopDocs topDocs = indexSearcher.search(booleanQuery,20);
            System.out.println("count:"+topDocs.totalHits);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document);
                articleList.add(article);
            }
            return articleList;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * lucene搜索分页高亮
     */
    SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
    @GetMapping("search")
    public List<Article> search(String search,int pageIndex,int pageSize){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            String[]  fields=new String[]{"title","content"};
            QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
            Query query = queryParser.parse(search);
            ScoreDoc lastScoreDoc = getLastScoreDoc(pageIndex, pageSize, query, indexSearcher);

            QueryScorer scorer=new QueryScorer(query);
            Highlighter highlighter=new Highlighter(fors,scorer);

            TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc,query,pageSize);
            System.out.println("count:"+topDocs.totalHits);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document,highlighter,scorer,indexSearcher,scoreDoc);
                articleList.add(article);
            }
            return articleList;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }
    private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher indexSearcher) throws IOException{
        if(pageIndex==1){
            return null;
        }
        int num = pageSize*(pageIndex-1);
        TopDocs tds = indexSearcher.search(query, num);
        return tds.scoreDocs[num-1];
    }

    /**
     * 字符匹配搜索
     * @return
     */
    @GetMapping("wordSearch")
    public  List<Article> wordSearch(String word){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            Query query=new WildcardQuery(new Term("title","*"+word+"*"));
            TopDocs topDocs = indexSearcher.search(query,100);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document);
                articleList.add(article);
            }
            return articleList;
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * 删除
     * @return
     */
    @GetMapping("delete")
    public boolean delete(String id){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.deleteDocuments(new Term("id",id));
            indexWriter.close();
            return true;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 更新
     * @return
     */
    @GetMapping("update")
    public boolean update(Article article){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.updateDocument(new Term("id",article.getId()),DocumentUtils.article2Document(article));
            indexWriter.close();
            return true;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

}

 


评论区
请写下您的评论...
暂无评论...
猜你喜欢
工具 1816 流程分析图引库中是如何存储的?数据库的一行数据会存储为一个document对象,一条记录的一列会存储为一个field,不会将数据库的所有数据都存储到引库。引是如何创建过程流程图1.分析
框架 1499 :2.3.1.RELEASE,elasticsearch版本:7.6.0demo下载参考:springboot+elasticsearchdemo开始前请阅读:elasticsearch官方档项目pom
其他 1834 一、表格 省 简称 省市 河北 冀A 河北石家庄 河北 冀B 河北唐山 河北 冀C 河北秦皇岛 河北 冀D 河北邯郸 河北 冀E 河北邢台 河北 冀F 河北保定 河北 冀G 河北张家口 河北 冀H 河北承德 河北 冀J 河北沧州 河北 冀R 河北廊坊 河北 冀T 河北衡水 山西 晋A 山西太原 山西 晋B 山西大同 山西 晋C 山西
java框架 1378 springboot整合elasticsearch框架实现引demo配置说明参考:http://www.jiajiajia.club/blog/artical/Ja4t7X/378
official 875 leetcode第79题(中等)原链接:https://leetcode-cn.com/problems/word-search/问题描述给定一个二维网格和一个单词,找出该单词是否存在于网格中。单词必须按照字母顺序,通过相邻的单元格内的字母构成,其中“相邻”单元格是那些水平相邻或垂直相邻的单元格。同一个单元格内的字母不允许被重复使用。示例board=[['A','B','C','E'],['S',
weblog 2812 vue过滤和排序!DOCTYPEhtmlhtml head metacharset="UTF-8" title/title scriptsrc="js/vue.min.js"/script
weblog 6422 a*算法动态演示分析请参考http://photo.jiajiajia.club/item/a-star.html什么是a*算法A*寻算法,俗称A星算法,作为启发式算法中的一种,这是一
weblog 6702 前言了解a*算法的原理请参考:http://www.jiajiajia.club/official/weblog/32a*算法动态演示分析,及代码,请参考:http
归档
2018-11  12 2018-12  33 2019-01  28 2019-02  28 2019-03  32 2019-04  27 2019-05  33 2019-06  6 2019-07  12 2019-08  12 2019-09  21 2019-10  8 2019-11  15 2019-12  25 2020-01  9 2020-02  5 2020-03  16 2020-04  4 2020-06  1 2020-07  7 2020-08  13 2020-09  9 2020-10  5 2020-12  3 2021-01  1 2021-02  5 2021-03  7 2021-04  4 2021-05  4 2021-06  1 2021-07  7 2021-08  2 2021-09  8 2021-10  9 2021-11  16 2021-12  14 2022-01  7 2022-05  1 2022-08  3 2022-09  2 2022-10  2 2022-12  5 2023-01  3 2023-02  1 2023-03  4 2023-04  2 2023-06  3 2023-07  4 2023-08  1 2023-10  1 2024-02  1 2024-03  1 2024-04  1 2024-08  1
标签
算法基础 linux 前端 c++ 数据结构 框架 数据库 计算机基础 储备知识 java基础 ASM 其他 深入理解java虚拟机 nginx git 消息中间件 搜索 maven redis docker dubbo vue 导入导出 软件使用 idea插件 协议 无聊的知识 jenkins springboot mqtt协议 keepalived minio mysql ensp 网络基础 xxl-job rabbitmq haproxy srs 音视频 webrtc javascript 加密算法
目录
没有一个冬天不可逾越,没有一个春天不会来临。最慢的步伐不是跬步,而是徘徊,最快的脚步不是冲刺,而是坚持。