lucene全文检索(搜索)

2019
0 73

lucene全文检索

依赖jar
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>

 

基础类
import lombok.Data;
/**
 * @ClassName Article
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
@Data
public class Article {
    private String id;
    private String title;
    private String content;
}
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;

/**
 * @ClassName d
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/

public class LuceneUtils {
    public static Directory directory;
    public static Analyzer analyzer;
    static{
        try{
            directory = FSDirectory.open(Paths.get("d:/test/indexDir"));
            analyzer = new StandardAnalyzer();
        }catch(Exception e){
            e.printStackTrace();
        }
    }
}
import com.example.demo.entity.Article;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.*;

import java.io.IOException;


/**
 * @ClassName DocumentUtils
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
public class DocumentUtils {
    public static Document article2Document(Article article){
        Document document = new Document();
        Field idField = new Field("id",article.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        Field titleField = new Field("title",article.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
        Field contentField = new Field("content",article.getContent(), Field.Store.YES, Field.Index.ANALYZED);
        document.add(idField);
        document.add(titleField);
        document.add(contentField);
        return document;
    }

    public static Article document2Article(Document document){
        Article article = new Article();
        article.setId(document.get("id"));
        article.setTitle(document.get("title"));
        article.setContent(document.get("content"));
        return article;
    }

    public static Article document2Article(Document document, Highlighter highlighter, QueryScorer queryScorer,
                                           IndexSearcher indexSearcher, ScoreDoc scoreDoc){
        Fragmenter fragment=new SimpleSpanFragmenter(queryScorer);
        highlighter.setTextFragmenter(fragment);
        TokenStream tokenStream= null;
        Article article = new Article();
        article.setId(document.get("id"));
        try {
            tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "title", LuceneUtils.analyzer);
            String title=highlighter.getBestFragment(tokenStream, document.get("title"));
            article.setTitle(title);

            tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "content", LuceneUtils.analyzer);
            String content=highlighter.getBestFragment(tokenStream, document.get("content"));
            article.setContent(content);
        } catch (IOException e) {
            e.printStackTrace();
        }catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
        return article;
    }
}
搜索测试
import com.example.demo.entity.Article;
import com.example.demo.server.DocumentUtils;
import com.example.demo.server.LuceneUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName TestController
 * @Description: TODO
 * @Author Jiajiajia
 * @Version V1.0
 **/
@RestController
public class TestController {

    /**
     * 创建
     * @param article
     * @return
     */
    @GetMapping("create")
    public Article create(Article article){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.addDocument(DocumentUtils.article2Document(article));
            indexWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return article;
    }

    /**
     * 根据id查询
     * @param id
     * @return
     */
    @GetMapping("searchById")
    public Article searchById(String id){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            Query query = new TermQuery(new Term("id",id));
            TopDocs topDocs = indexSearcher.search(query,1);
            if(topDocs.scoreDocs.length>0){
                Document document =  indexSearcher.doc(topDocs.scoreDocs[0].doc);
                Article article = DocumentUtils.document2Article(document);
                return article;
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * 搜索
     * @param search
     * @return
     */
    @GetMapping("search/basic")
    public List<Article> searchBasic(String search,String id){
        IndexSearcher indexSearcher = null;
        try {
            BooleanQuery booleanQuery = new BooleanQuery();
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            String[]  fields=new String[]{"title","content"};
            QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
            Query query = queryParser.parse(search);
            booleanQuery.add(query,BooleanClause.Occur.SHOULD);
            if(id!=null&&!"".equals(id)){
                Query query2 = new TermQuery(new Term("id",id));
                booleanQuery.add(query2,BooleanClause.Occur.MUST_NOT);
            }
            TopDocs topDocs = indexSearcher.search(booleanQuery,20);
            System.out.println("count:"+topDocs.totalHits);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document);
                articleList.add(article);
            }
            return articleList;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * lucene搜索分页高亮
     */
    SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
    @GetMapping("search")
    public List<Article> search(String search,int pageIndex,int pageSize){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            String[]  fields=new String[]{"title","content"};
            QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
            Query query = queryParser.parse(search);
            ScoreDoc lastScoreDoc = getLastScoreDoc(pageIndex, pageSize, query, indexSearcher);

            QueryScorer scorer=new QueryScorer(query);
            Highlighter highlighter=new Highlighter(fors,scorer);

            TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc,query,pageSize);
            System.out.println("count:"+topDocs.totalHits);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document,highlighter,scorer,indexSearcher,scoreDoc);
                articleList.add(article);
            }
            return articleList;
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }
    private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher indexSearcher) throws IOException{
        if(pageIndex==1){
            return null;
        }
        int num = pageSize*(pageIndex-1);
        TopDocs tds = indexSearcher.search(query, num);
        return tds.scoreDocs[num-1];
    }

    /**
     * 字符匹配搜索
     * @return
     */
    @GetMapping("wordSearch")
    public  List<Article> wordSearch(String word){
        IndexSearcher indexSearcher = null;
        try {
            indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
            Query query=new WildcardQuery(new Term("title","*"+word+"*"));
            TopDocs topDocs = indexSearcher.search(query,100);
            List<Article> articleList = new ArrayList<>();
            for(ScoreDoc scoreDoc:topDocs.scoreDocs){
                Document document =  indexSearcher.doc(scoreDoc.doc);
                Article article = DocumentUtils.document2Article(document);
                articleList.add(article);
            }
            return articleList;
        }catch (Exception e){
            e.printStackTrace();
        }finally {
            try {
                indexSearcher.getIndexReader().close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return null;
    }

    /**
     * 删除
     * @return
     */
    @GetMapping("delete")
    public boolean delete(String id){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.deleteDocuments(new Term("id",id));
            indexWriter.close();
            return true;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

    /**
     * 更新
     * @return
     */
    @GetMapping("update")
    public boolean update(Article article){
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
            IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
            indexWriter.updateDocument(new Term("id",article.getId()),DocumentUtils.article2Document(article));
            indexWriter.close();
            return true;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

}

 

留言(0)
加载更多
猜你喜欢
  • blog 数据结构+算法-堆排序

    堆排序(英语:Heapsort)是指利用堆这种数据结构所设计的一种排序算法。堆是一个近似完二叉树的结构,并同时满足堆积的性质:即子结点的键值或引总是小于(或者大于)它的父节点。以最小堆为例下沉操作对于一个非叶子节点的下沉操作指的是:如果
  • blog 在阿里云申请SSL证书实现网站https访问

    首先你有一个网站和一个域名首先在阿里云中ssl,点击SSL证书到控制台控制台购买完成后点击证书申请,会让你填写认证信息提交完成后会有一个CA审核的状态,不过审核过程应该很快。审核完成后然后点击下载证书,选择的应用的类型下载以tomcat
  • blog linux命令大

    Linux命令一、Linux下常用命令:件与目录操作basename:从件名中去掉路径和扩展名cd:切换当前工作目录到指定目录chgrp:改变件所属组chmod:改变件的权限chown:改变件的所有者和组cp:复制件或目录dd:
  • blog linux centos7安装mysql57

    linux centos7安装mysql57在安装之前先查一下之前有没有安装过,如果之前安装过,先卸载完再重新安装1.将mysql的包传送到 /usr/local/下 用#tar -zxvf命令解压件#tar -zxvf mys
  • blog linux查看进程的命令

    linux下查看进程的命令1、ps 命令用于查看当前正在运行的进程。grep 是例如: ps -ef | grep java表示查看所有进程里 CMD 是 java 的进程信息2、ps -aux | grep java-aux 显示所有
  • blog 迷宫问题-寻找最短路径

    迷宫问题-寻找最短路径算法:广度优先数据结构:队列,链表代码实现:<!DOCTYPE html><html> <head> <meta charset='UTF-8'> <title></title> <script> var
  • blog SQL语句case when then用法

    两种用法:简单case函数case sex when '1' then '男' when '2' then '女’ else '其他' endcase函数case when sex = '1' then '男' when
  • blog 算法-没有bug的二分查找

    科普: 第一篇二分是 1946 年发表,然而第一个没有 bug 的二分查找法却是在 1962 年才出现,中间用了 16 年的时间。定义 在计算机科学中,二分查找(英语:binary search),也称折
  • blog linux系统安装elasticsearch服务器

    软件下载地址:https://pan.baidu.com/s/13gX0ParcOMO_XYcdfSPlzg   提取码:9y0e 安装的时候注意jdk和elasticsearch版本问题,本次测试版本:java 14
  • blog 算法-迷宫问题-广度优先-队列

    问题描述思路: 典型的广度优先算法,根据字典序大小,可以确定遍历的循序, 因为字典序D<L<R<U, 所以对于每一个节点优先先往下走,然后向左走,然后向右走,然后向上走。则最后首先到达出口的一条路径就是符合题意的最短路径。