lucene全文检索(搜索)
lucene全文检索
依赖jar
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
基础类
import lombok.Data;
/**
* @ClassName Article
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
@Data
public class Article {
private String id;
private String title;
private String content;
}
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.nio.file.Paths;
/**
* @ClassName d
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
public class LuceneUtils {
public static Directory directory;
public static Analyzer analyzer;
static{
try{
directory = FSDirectory.open(Paths.get("d:/test/indexDir"));
analyzer = new StandardAnalyzer();
}catch(Exception e){
e.printStackTrace();
}
}
}
import com.example.demo.entity.Article;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.*;
import java.io.IOException;
/**
* @ClassName DocumentUtils
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
public class DocumentUtils {
public static Document article2Document(Article article){
Document document = new Document();
Field idField = new Field("id",article.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED);
Field titleField = new Field("title",article.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
Field contentField = new Field("content",article.getContent(), Field.Store.YES, Field.Index.ANALYZED);
document.add(idField);
document.add(titleField);
document.add(contentField);
return document;
}
public static Article document2Article(Document document){
Article article = new Article();
article.setId(document.get("id"));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
return article;
}
public static Article document2Article(Document document, Highlighter highlighter, QueryScorer queryScorer,
IndexSearcher indexSearcher, ScoreDoc scoreDoc){
Fragmenter fragment=new SimpleSpanFragmenter(queryScorer);
highlighter.setTextFragmenter(fragment);
TokenStream tokenStream= null;
Article article = new Article();
article.setId(document.get("id"));
try {
tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "title", LuceneUtils.analyzer);
String title=highlighter.getBestFragment(tokenStream, document.get("title"));
article.setTitle(title);
tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "content", LuceneUtils.analyzer);
String content=highlighter.getBestFragment(tokenStream, document.get("content"));
article.setContent(content);
} catch (IOException e) {
e.printStackTrace();
}catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return article;
}
}
搜索测试
import com.example.demo.entity.Article;
import com.example.demo.server.DocumentUtils;
import com.example.demo.server.LuceneUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName TestController
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
@RestController
public class TestController {
/**
* 创建
* @param article
* @return
*/
@GetMapping("create")
public Article create(Article article){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.addDocument(DocumentUtils.article2Document(article));
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
return article;
}
/**
* 根据id查询
* @param id
* @return
*/
@GetMapping("searchById")
public Article searchById(String id){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
Query query = new TermQuery(new Term("id",id));
TopDocs topDocs = indexSearcher.search(query,1);
if(topDocs.scoreDocs.length>0){
Document document = indexSearcher.doc(topDocs.scoreDocs[0].doc);
Article article = DocumentUtils.document2Article(document);
return article;
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 搜索
* @param search
* @return
*/
@GetMapping("search/basic")
public List<Article> searchBasic(String search,String id){
IndexSearcher indexSearcher = null;
try {
BooleanQuery booleanQuery = new BooleanQuery();
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
String[] fields=new String[]{"title","content"};
QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
Query query = queryParser.parse(search);
booleanQuery.add(query,BooleanClause.Occur.SHOULD);
if(id!=null&&!"".equals(id)){
Query query2 = new TermQuery(new Term("id",id));
booleanQuery.add(query2,BooleanClause.Occur.MUST_NOT);
}
TopDocs topDocs = indexSearcher.search(booleanQuery,20);
System.out.println("count:"+topDocs.totalHits);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document);
articleList.add(article);
}
return articleList;
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* lucene搜索分页高亮
*/
SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
@GetMapping("search")
public List<Article> search(String search,int pageIndex,int pageSize){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
String[] fields=new String[]{"title","content"};
QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
Query query = queryParser.parse(search);
ScoreDoc lastScoreDoc = getLastScoreDoc(pageIndex, pageSize, query, indexSearcher);
QueryScorer scorer=new QueryScorer(query);
Highlighter highlighter=new Highlighter(fors,scorer);
TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc,query,pageSize);
System.out.println("count:"+topDocs.totalHits);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document,highlighter,scorer,indexSearcher,scoreDoc);
articleList.add(article);
}
return articleList;
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher indexSearcher) throws IOException{
if(pageIndex==1){
return null;
}
int num = pageSize*(pageIndex-1);
TopDocs tds = indexSearcher.search(query, num);
return tds.scoreDocs[num-1];
}
/**
* 字符匹配搜索
* @return
*/
@GetMapping("wordSearch")
public List<Article> wordSearch(String word){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
Query query=new WildcardQuery(new Term("title","*"+word+"*"));
TopDocs topDocs = indexSearcher.search(query,100);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document);
articleList.add(article);
}
return articleList;
}catch (Exception e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 删除
* @return
*/
@GetMapping("delete")
public boolean delete(String id){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.deleteDocuments(new Term("id",id));
indexWriter.close();
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
/**
* 更新
* @return
*/
@GetMapping("update")
public boolean update(Article article){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.updateDocument(new Term("id",article.getId()),DocumentUtils.article2Document(article));
indexWriter.close();
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
}
评论区
请写下您的评论...
猜你喜欢
blog
全文检索笔记
工具
1816
全文检索流程分析图索引库中是如何存储的?数据库的一行数据会存储为一个document对象,一条记录的一列会存储为一个field,不会将数据库的所有数据都存储到索引库。索引是如何创建过程流程图1.分析
框架
1499
:2.3.1.RELEASE,elasticsearch版本:7.6.0demo下载参考:springboot+elasticsearchdemo开始前请阅读:elasticsearch官方文档项目pom
blog
全国车辆归属地检索表
其他
1834
一、表格
省
简称
省市
河北
冀A
河北石家庄
河北
冀B
河北唐山
河北
冀C
河北秦皇岛
河北
冀D
河北邯郸
河北
冀E
河北邢台
河北
冀F
河北保定
河北
冀G
河北张家口
河北
冀H
河北承德
河北
冀J
河北沧州
河北
冀R
河北廊坊
河北
冀T
河北衡水
山西
晋A
山西太原
山西
晋B
山西大同
山西
晋C
山西
java框架
1378
springboot整合elasticsearch框架实现全文索引demo配置说明参考:http://www.jiajiajia.club/blog/artical/Ja4t7X/378
ofc
单词搜索
official
875
leetcode第79题(中等)原链接:https://leetcode-cn.com/problems/word-search/问题描述给定一个二维网格和一个单词,找出该单词是否存在于网格中。单词必须按照字母顺序,通过相邻的单元格内的字母构成,其中“相邻”单元格是那些水平相邻或垂直相邻的单元格。同一个单元格内的字母不允许被重复使用。示例board=[['A','B','C','E'],['S',
ofc
vue搜索过滤和排序
weblog
2812
vue搜索过滤和排序!DOCTYPEhtmlhtml head metacharset="UTF-8" title/title scriptsrc="js/vue.min.js"/script
weblog
6422
a*搜索算法动态演示分析请参考http://photo.jiajiajia.club/item/a-star.html什么是a*搜索算法A*搜寻算法,俗称A星算法,作为启发式搜索算法中的一种,这是一
weblog
6702
前言了解a*搜索算法的原理请参考:http://www.jiajiajia.club/official/weblog/32a*搜索算法动态演示分析,及代码,请参考:http
最新发表
归档
2018-11
12
2018-12
33
2019-01
28
2019-02
28
2019-03
32
2019-04
27
2019-05
33
2019-06
6
2019-07
12
2019-08
12
2019-09
21
2019-10
8
2019-11
15
2019-12
25
2020-01
9
2020-02
5
2020-03
16
2020-04
4
2020-06
1
2020-07
7
2020-08
13
2020-09
9
2020-10
5
2020-12
3
2021-01
1
2021-02
5
2021-03
7
2021-04
4
2021-05
4
2021-06
1
2021-07
7
2021-08
2
2021-09
8
2021-10
9
2021-11
16
2021-12
14
2022-01
7
2022-05
1
2022-08
3
2022-09
2
2022-10
2
2022-12
5
2023-01
3
2023-02
1
2023-03
4
2023-04
2
2023-06
3
2023-07
4
2023-08
1
2023-10
1
2024-02
1
2024-03
1
2024-04
1
2024-08
1
标签
算法基础
linux
前端
c++
数据结构
框架
数据库
计算机基础
储备知识
java基础
ASM
其他
深入理解java虚拟机
nginx
git
消息中间件
搜索
maven
redis
docker
dubbo
vue
导入导出
软件使用
idea插件
协议
无聊的知识
jenkins
springboot
mqtt协议
keepalived
minio
mysql
ensp
网络基础
xxl-job
rabbitmq
haproxy
srs
音视频
webrtc
javascript
加密算法
目录
没有一个冬天不可逾越,没有一个春天不会来临。最慢的步伐不是跬步,而是徘徊,最快的脚步不是冲刺,而是坚持。