`
tomhibolu
  • 浏览: 1387125 次
文章分类
社区版块
存档分类
最新评论

Lucene的入门例子 - 创建索引,利用索引查询

 
阅读更多

建立索引的基本过程:

private void createIndexFileForEntities() throws SearchException
{
try
{
cleanDirectory(this.indexEntitiesPath);

File dir = new File(entitiesPath);
File[] files = dir.listFiles((FileFilter) new GlobFilenameFilter("*.xls"));

MemoryDirectory directory = new MemoryDirectory();
// FSDirectory directory = FSDirectory.getDirectory(this.indexEntitiesPath);

if (files != null)
{
IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(), true);
for (File file : files)
{
List<ImportEntity> entities = EntityExcelReader.getEntities(file.getAbsolutePath());
for (int i = 0; i < entities.size(); i++)
{
ImportEntity de = entities.get(i);
Document doc = new Document();
doc.add(new Field(ONTOLOGY_ID, de.getOntologyID(), Field.Store.YES, Field.Index.UN_TOKENIZED));

List<String> names = de.getAttributeNames();
for (String name : names)
{
String value = de.getAttribute(name);
Field field = null;
if (name.equalsIgnoreCase(AGE) || name.equalsIgnoreCase(SALARY))
{
long lvalue = Long.parseLong(value);
String strvalue = NumberTools.longToString(lvalue);
field = new Field(name, strvalue, Field.Store.YES, Field.Index.UN_TOKENIZED);
}
else if (name.equalsIgnoreCase(BIRTHDAY))
{
String strvalue = formatDateToString(value);
field = new Field(name, strvalue, Field.Store.YES, Field.Index.UN_TOKENIZED);
}
else
{
field = new Field(name, value, Field.Store.YES, Field.Index.TOKENIZED);
}
doc.add(field);
}
doc.setBoost(0.6f);
writer.addDocument(doc);
}
}

writer.optimize();
writer.close();

System.out.println("RAM size:" + directory.sizeInBytes());
FSDirectory fsDirectory = FSDirectory.getDirectory(this.indexEntitiesPath);
directory.copyToDirectory(fsDirectory);
}
}
catch (Exception ex)
{
ex.printStackTrace();
throw new SearchException(ex);
}
}

添加索引的基本过程:

private void appendIndexFileForEntities() throws SearchException
{
try
{
FSDirectory fsDirectory = FSDirectory.getDirectory(this.indexEntitiesPath);
IndexWriter writer = new IndexWriter(fsDirectory, new StandardAnalyzer(), true);

for (int i = 1; i <= 10; i++)
{
Document doc = new Document();
doc.add(new Field(ONTOLOGY_ID, "", Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(NAME, "Theaters" + String.valueOf(1), Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field(AGE, LongTools.longToString(10 * i + 5), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(AGE_TEN, LongTools.longToString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field(SALARY, LongTools.longToString(5), Field.Store.YES, Field.Index.UN_TOKENIZED));
doc
.add(new Field(BIRTHDAY, formatDateToString("2005-11-4"), Field.Store.YES,
Field.Index.UN_TOKENIZED));
doc.add(new Field(DESCRIPTION, "only a test string", Field.Store.YES, Field.Index.TOKENIZED));
doc.setBoost(0.7f);
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
catch (Exception ex)
{
throw new SearchException(ex);
}
}

利用索引查询的过程:

private void queryIndexFileForEntities() throws SearchException
{
try
{
MemoryDirectory ramDir = new MemoryDirectory(FSDirectory.getDirectory(this.indexEntitiesPath));

IndexReader reader = IndexReader.open(ramDir);

// IndexReader reader = IndexReader.open(this.indexEntitiesPath);

Searcher searcher = new IndexSearcher(reader);
Hits hits = null;
hits = this.queryBySearcher(searcher);

for (int i = 0; i < hits.length(); i++)
{
Document doc = hits.doc(i);
System.out.println("Hit Document Index: " + Integer.toString(i + 1));
System.out.println("doc=" + hits.id(i) + " score=" + hits.score(i));
System.out.println("hits boost:" + doc.getBoost());
System.out.println(getDocumentItem(doc));
}
}
catch (Exception ex)
{
ex.printStackTrace();
throw new SearchException(ex);
}
}

编码函数:

private String formatDateToString(String strdate) throws ParseException
{
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(strdate);
return DateTools.dateToString(date, DateTools.Resolution.DAY);
}

辅助清空目录函数:

private void cleanDirectory(String path) throws SearchException
{
try
{
File dir = new File(path);
File[] files = dir.listFiles();
if (files != null)
{
for (File file : files)
{
if (file.isFile())
{
file.delete(); // remove all the index files under that directory
}
}
}
}
catch (Exception ex)
{
throw new SearchException(ex);
}
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics