lucene全文检索
依赖jar
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
基础类
import lombok.Data;
/**
* @ClassName Article
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
@Data
public class Article {
private String id;
private String title;
private String content;
}
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.nio.file.Paths;
/**
* @ClassName d
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
public class LuceneUtils {
public static Directory directory;
public static Analyzer analyzer;
static{
try{
directory = FSDirectory.open(Paths.get("d:/test/indexDir"));
analyzer = new StandardAnalyzer();
}catch(Exception e){
e.printStackTrace();
}
}
}
import com.example.demo.entity.Article;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.*;
import java.io.IOException;
/**
* @ClassName DocumentUtils
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
public class DocumentUtils {
public static Document article2Document(Article article){
Document document = new Document();
Field idField = new Field("id",article.getId(), Field.Store.YES, Field.Index.NOT_ANALYZED);
Field titleField = new Field("title",article.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
Field contentField = new Field("content",article.getContent(), Field.Store.YES, Field.Index.ANALYZED);
document.add(idField);
document.add(titleField);
document.add(contentField);
return document;
}
public static Article document2Article(Document document){
Article article = new Article();
article.setId(document.get("id"));
article.setTitle(document.get("title"));
article.setContent(document.get("content"));
return article;
}
public static Article document2Article(Document document, Highlighter highlighter, QueryScorer queryScorer,
IndexSearcher indexSearcher, ScoreDoc scoreDoc){
Fragmenter fragment=new SimpleSpanFragmenter(queryScorer);
highlighter.setTextFragmenter(fragment);
TokenStream tokenStream= null;
Article article = new Article();
article.setId(document.get("id"));
try {
tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "title", LuceneUtils.analyzer);
String title=highlighter.getBestFragment(tokenStream, document.get("title"));
article.setTitle(title);
tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "content", LuceneUtils.analyzer);
String content=highlighter.getBestFragment(tokenStream, document.get("content"));
article.setContent(content);
} catch (IOException e) {
e.printStackTrace();
}catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return article;
}
}
搜索测试
import com.example.demo.entity.Article;
import com.example.demo.server.DocumentUtils;
import com.example.demo.server.LuceneUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName TestController
* @Description: TODO
* @Author Jiajiajia
* @Version V1.0
**/
@RestController
public class TestController {
/**
* 创建
* @param article
* @return
*/
@GetMapping("create")
public Article create(Article article){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.addDocument(DocumentUtils.article2Document(article));
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
return article;
}
/**
* 根据id查询
* @param id
* @return
*/
@GetMapping("searchById")
public Article searchById(String id){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
Query query = new TermQuery(new Term("id",id));
TopDocs topDocs = indexSearcher.search(query,1);
if(topDocs.scoreDocs.length>0){
Document document = indexSearcher.doc(topDocs.scoreDocs[0].doc);
Article article = DocumentUtils.document2Article(document);
return article;
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 搜索
* @param search
* @return
*/
@GetMapping("search/basic")
public List<Article> searchBasic(String search,String id){
IndexSearcher indexSearcher = null;
try {
BooleanQuery booleanQuery = new BooleanQuery();
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
String[] fields=new String[]{"title","content"};
QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
Query query = queryParser.parse(search);
booleanQuery.add(query,BooleanClause.Occur.SHOULD);
if(id!=null&&!"".equals(id)){
Query query2 = new TermQuery(new Term("id",id));
booleanQuery.add(query2,BooleanClause.Occur.MUST_NOT);
}
TopDocs topDocs = indexSearcher.search(booleanQuery,20);
System.out.println("count:"+topDocs.totalHits);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document);
articleList.add(article);
}
return articleList;
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* lucene搜索分页高亮
*/
SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
@GetMapping("search")
public List<Article> search(String search,int pageIndex,int pageSize){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
String[] fields=new String[]{"title","content"};
QueryParser queryParser = new MultiFieldQueryParser(fields, LuceneUtils.analyzer);
Query query = queryParser.parse(search);
ScoreDoc lastScoreDoc = getLastScoreDoc(pageIndex, pageSize, query, indexSearcher);
QueryScorer scorer=new QueryScorer(query);
Highlighter highlighter=new Highlighter(fors,scorer);
TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc,query,pageSize);
System.out.println("count:"+topDocs.totalHits);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document,highlighter,scorer,indexSearcher,scoreDoc);
articleList.add(article);
}
return articleList;
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
private ScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearcher indexSearcher) throws IOException{
if(pageIndex==1){
return null;
}
int num = pageSize*(pageIndex-1);
TopDocs tds = indexSearcher.search(query, num);
return tds.scoreDocs[num-1];
}
/**
* 字符匹配搜索
* @return
*/
@GetMapping("wordSearch")
public List<Article> wordSearch(String word){
IndexSearcher indexSearcher = null;
try {
indexSearcher = new IndexSearcher(DirectoryReader.open(LuceneUtils.directory));
Query query=new WildcardQuery(new Term("title","*"+word+"*"));
TopDocs topDocs = indexSearcher.search(query,100);
List<Article> articleList = new ArrayList<>();
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document = indexSearcher.doc(scoreDoc.doc);
Article article = DocumentUtils.document2Article(document);
articleList.add(article);
}
return articleList;
}catch (Exception e){
e.printStackTrace();
}finally {
try {
indexSearcher.getIndexReader().close();
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 删除
* @return
*/
@GetMapping("delete")
public boolean delete(String id){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.deleteDocuments(new Term("id",id));
indexWriter.close();
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
/**
* 更新
* @return
*/
@GetMapping("update")
public boolean update(Article article){
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(LuceneUtils.analyzer);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.directory,indexWriterConfig);
indexWriter.updateDocument(new Term("id",article.getId()),DocumentUtils.article2Document(article));
indexWriter.close();
return true;
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
}