使用Lucene为WordPress创建索引

作者 : admin 于 2008-10-15 15:41:04 标签: , ,
2008
10-15

上次说到Lucene的Document模型,现在,我们使用WordPress为例,为其创建索引。

分析wordpress数据库结构,我们只需要为标题内容,即主要的文本内容创建索引即可。

具体语句为:  SELECT ID,post_title,post_content,guid FROM wp_posts ORDER BY ID DESC

如下代码,mysql连接类用的上文的mysql类,代码风格还是很PHP。生成的索引全保存在当前的index文件夹下

E:\java\index 的目录

2008-10-16 14:40

.
2008-10-16 14:40 ..
2008-10-16 14:40 20 segments.gen
2008-10-16 14:40 98 segments_a4
2008-10-16 14:40 268,174 _a0.fdt
2008-10-16 14:40 2,600 _a0.fdx
2008-10-16 14:40 37 _a0.fnm
2008-10-16 14:40 20,990 _a0.frq
2008-10-16 14:40 1,304 _a0.nrm
2008-10-16 14:40 27,210 _a0.prx
2008-10-16 14:40 1,281 _a0.tii
2008-10-16 14:40 130,051 _a0.tis
2008-10-16 14:40 356 _a1.fdt

2008-10-16 14:40 8 _a1.fdx
2008-10-16 14:40 37 _a1.fnm
2008-10-16 14:40 23 _a1.frq
2008-10-16 14:40 8 _a1.nrm
2008-10-16 14:40 23 _a1.prx
2008-10-16 14:40 35 _a1.tii
2008-10-16 14:40 413 _a1.tis
2008-10-16 14:40 1,362 _a2.fdt
2008-10-16 14:40 8 _a2.fdx
2008-10-16 14:40 37 _a2.fnm
2008-10-16 14:40 74 _a2.frq
2008-10-16 14:40 8 _a2.nrm
2008-10-16 14:40 87 _a2.prx
2008-10-16 14:40 35 _a2.tii
2008-10-16 14:40 1,348 _a2.tis
26 个文件 455,627 字节
2 个目录 30,843,887,616 可用字节

E:\java\index

  1. import java.io.*;
  2. import org.apache.lucene.document.Document;
  3. import org.apache.lucene.document.Field;
  4. import org.apache.lucene.index.IndexWriter;
  5. import org.apache.lucene.analysis.SimpleAnalyzer;
  6. import org.apache.lucene.analysis.Analyzer;
  7. public class Index
  8. {
  9. private MysqlConn Db = null;
  10. //构造函数
  11. public void Index()
  12. {
  13. }
  14. //建立mysql连接
  15. public void getConn( String conndsn )
  16. {
  17. this.Db = new MysqlConn();
  18. this.Db.SetDsn( conndsn );
  19. }
  20. public static void main( String args[] )
  21. {
  22. Index index = new Index();
  23. index.getConn( "jdbc:mysql://localhost:3306/wp" );
  24. index.Db.SetUserPass( "root" , "123456" );
  25. index.Db.Conn();
  26. String sql = "SELECT ID,post_title,post_content,guid FROM wp_posts ORDER BY ID DESC";
  27. index.Db.sqlQuery( sql );
  28. try
  29. {
  30. while( index.Db.rs.next() )
  31. {
  32. String ID = index.Db.rs.getString( "ID" );
  33. String post_title = index.Db.rs.getString( "post_title" );
  34. String post_content = index.Db.rs.getString( "post_content" );
  35. String guid = index.Db.rs.getString( "guid" );
  36. Document doc = new Document();
  37. //注释1
  38. Field f1 = new Field("ID",ID,Field.Store.YES,Field.Index.TOKENIZED);
  39. Field f2 = new Field("post_title",post_title,Field.Store.YES,Field.Index.TOKENIZED);
  40. Field f3 = new Field("post_content",post_content,Field.Store.YES,Field.Index.TOKENIZED);
  41. Field f4 = new Field("guid",guid,Field.Store.YES,Field.Index.TOKENIZED);
  42. doc.add( f1 );
  43. doc.add( f2 );
  44. doc.add( f3 );
  45. doc.add( f4 );
  46. System.out.println( ID );
  47. try
  48. {
  49. IndexWriter writer = new IndexWriter( "./index" , new SimpleAnalyzer() );
  50. writer.setUseCompoundFile(false);
  51. writer.addDocument( doc );
  52. writer.close();
  53. }
  54. catch (Exception e)
  55. {
  56. System.out.println("Error : " + e.toString());
  57. }
  58. }
  59. }
  60. catch(Exception e)
  61. {
  62. System.out.println("Error : " + e.toString());
  63. }
  64.  
  65. }
  66. }

发表评论




XHTML:你可以使用的标签: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

(若看不到验证码,请重新加载页面。)