10-15
上次说到Lucene的Document模型,现在,我们使用WordPress为例,为其创建索引。
分析wordpress数据库结构,我们只需要为标题内容,即主要的文本内容创建索引即可。
具体语句为: SELECT ID,post_title,post_content,guid FROM wp_posts ORDER BY ID DESC
如下代码,mysql连接类用的上文的mysql类,代码风格还是很PHP。生成的索引全保存在当前的index文件夹下
E:\java\index 的目录
2008-10-16 14:40
.
2008-10-16 14:40 ..
2008-10-16 14:40 20 segments.gen
2008-10-16 14:40 98 segments_a4
2008-10-16 14:40 268,174 _a0.fdt
2008-10-16 14:40 2,600 _a0.fdx
2008-10-16 14:40 37 _a0.fnm
2008-10-16 14:40 20,990 _a0.frq
2008-10-16 14:40 1,304 _a0.nrm
2008-10-16 14:40 27,210 _a0.prx
2008-10-16 14:40 1,281 _a0.tii
2008-10-16 14:40 130,051 _a0.tis
2008-10-16 14:40 356 _a1.fdt
2008-10-16 14:40 8 _a1.fdx
2008-10-16 14:40 37 _a1.fnm
2008-10-16 14:40 23 _a1.frq
2008-10-16 14:40 8 _a1.nrm
2008-10-16 14:40 23 _a1.prx
2008-10-16 14:40 35 _a1.tii
2008-10-16 14:40 413 _a1.tis
2008-10-16 14:40 1,362 _a2.fdt
2008-10-16 14:40 8 _a2.fdx
2008-10-16 14:40 37 _a2.fnm
2008-10-16 14:40 74 _a2.frq
2008-10-16 14:40 8 _a2.nrm
2008-10-16 14:40 87 _a2.prx
2008-10-16 14:40 35 _a2.tii
2008-10-16 14:40 1,348 _a2.tis
26 个文件 455,627 字节
2 个目录 30,843,887,616 可用字节
E:\java\index
- import java.io.*;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.analysis.SimpleAnalyzer;
- import org.apache.lucene.analysis.Analyzer;
- public class Index
- {
- private MysqlConn Db = null;
- //构造函数
- public void Index()
- {
- }
- //建立mysql连接
- public void getConn( String conndsn )
- {
- this.Db = new MysqlConn();
- this.Db.SetDsn( conndsn );
- }
- public static void main( String args[] )
- {
- Index index = new Index();
- index.getConn( "jdbc:mysql://localhost:3306/wp" );
- index.Db.SetUserPass( "root" , "123456" );
- index.Db.Conn();
- String sql = "SELECT ID,post_title,post_content,guid FROM wp_posts ORDER BY ID DESC";
- index.Db.sqlQuery( sql );
- try
- {
- while( index.Db.rs.next() )
- {
- String ID = index.Db.rs.getString( "ID" );
- String post_title = index.Db.rs.getString( "post_title" );
- String post_content = index.Db.rs.getString( "post_content" );
- String guid = index.Db.rs.getString( "guid" );
- Document doc = new Document();
- //注释1
- Field f1 = new Field("ID",ID,Field.Store.YES,Field.Index.TOKENIZED);
- Field f2 = new Field("post_title",post_title,Field.Store.YES,Field.Index.TOKENIZED);
- Field f3 = new Field("post_content",post_content,Field.Store.YES,Field.Index.TOKENIZED);
- Field f4 = new Field("guid",guid,Field.Store.YES,Field.Index.TOKENIZED);
- doc.add( f1 );
- doc.add( f2 );
- doc.add( f3 );
- doc.add( f4 );
- System.out.println( ID );
- try
- {
- IndexWriter writer = new IndexWriter( "./index" , new SimpleAnalyzer() );
- writer.setUseCompoundFile(false);
- writer.addDocument( doc );
- writer.close();
- }
- catch (Exception e)
- {
- System.out.println("Error : " + e.toString());
- }
- }
- }
- catch(Exception e)
- {
- System.out.println("Error : " + e.toString());
- }
- }
- }