`
DAOException
  • 浏览: 120848 次
  • 性别: Icon_minigender_1
  • 来自: 南京
社区版块
存档分类
最新评论

lucene中给数据库做索引

    博客分类:
  • java
阅读更多

       好久没有写博客了,今天写一点,其实也不想写,最近在整搜索引擎相关的东西,要对数据库建立索引,lucene可以很方便的对文本文件,包括pdf等文件做索引,当然数据库也可以哇。

       废话不说,贴代码吧:

import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.patent.common.connectionManage.ConnectionManage;
import com.patent.common.connectionManage.ResultManage;

/**
 * @desc 专利数据索引管理类
 * @author 陈建国
 *
 */
public class IndexManage {
	private Directory ramdDirectory = null; //内存索引目录
	private Directory fsdDirectory = null;//文件索引目录
	private IndexWriter iwriter_ram = null; //
	private IndexWriter iwriter_fsd = null; //
	private ResultSet Result = null;//
	private ConnectionManage connManage = null;
	private int currectPage = 0 ;
	/**
	 * @desc 初始化lucene索引。
	 */
	public IndexManage(){
	
		try {
			fsdDirectory = FSDirectory.getDirectory("E:/temp/index/");
			iwriter_fsd = new IndexWriter(fsdDirectory,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.LIMITED);
			connManage = new ConnectionManage();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/**
	 * 获取结果集
	 * @return
	 */
	public void getResult(int currectPage){
		ramdDirectory = new RAMDirectory();
		try {
			iwriter_ram = new IndexWriter(ramdDirectory,new IKAnalyzer(),true,IndexWriter.MaxFieldLength.LIMITED);
			
			ResultManage rsManage = new ResultManage(connManage.getConnection(),currectPage);
			Result = rsManage.getResultSet();	
			int k = 0;
			while(Result.next()){
				//System.out.println(currectPage+" "+k++);
				//添加document
				Document doc = new Document();
				doc.add(new Field("PatentNo",Result.getString("PatentNo"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("PublicationDate",Result.getString("PublicationDate"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("Title",Result.getString("Title"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("Inventors",Result.getString("Inventors"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("ANA",Result.getString("ANA"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("SerialNo",Result.getString("SerialNo")+Result.getString("SeriesCode"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("Filed",Result.getString("Filed"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("USCurrentClass",Result.getString("USCurrentClass"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("InternlClass",Result.getString("InternlClass"),Field.Store.YES,Field.Index.ANALYZED));
				doc.add(new Field("Abstract",Result.getString("Abstract"),Field.Store.YES,Field.Index.ANALYZED));
				
				iwriter_ram.addDocument(doc);
				
			}
			
			iwriter_ram.commit();
			iwriter_fsd.addIndexes(new Directory[]{ramdDirectory});
			
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		}
		
	}
	
	/**
	 * 关闭indexWriter
	 */
	public void close(){
		if(iwriter_fsd != null){
			try {
				iwriter_fsd.commit();
				iwriter_fsd.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			
		}
	}
	
}

其实么,说白了就是将数据库中的数据取出来然后构建document。不过今天遇到一个大问题了,取那些百万级别的数据到底怎么办,感觉速度好慢啊。

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics