Platinum Partner
solr,oracle,apache,lucene,database

Lucene Setup on OracleDB in 5 Minutes

This tutorial is for people who want to run an Apache Lucene example with OracleDB in just five minutes.

This tutorial is for people who want to run an Apache Lucene example with OracleDB in just five minutes.

What you need to run this example:

  1. Eclipse .
  2. Oracle DB... we will use SCOTT schema.
  3. Be Excited ;)

First Minute:

Quick introduction...

What is Apache-Lucene ?

"Apache Lucene(TM) is a high-performance, full-featured text search engine library written entirely in Java. It is a technology suitable for nearly any application that requires full-text search, especially cross-platform."

Simply , Lucene dealing with document; The search will be on the fields inside documents.

How can we use lucene to index database ?

Simply you need to map database records to lucene documents, and map the database table's columns to lucene document's fields.

Second Minute:

Lets start...

1. Download the tutorial code from resources section.

2. Open  Eclipse.

3. Import LuceneForBegginer to your workspace.

Third Minute:

Lets run...

Open SimpleDBIndexer class which is responsible for indexing Database.

Now:

1. Modify the constants before running class.

2. Run and check Index directory folder :)

package com.juma.mohammad;

import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;

import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


public class SimpleDBIndexer {
public static final String INDEX_DIR = "C:/ScottDBIndex/";
private static final String JDBC_DRIVER = "oracle.jdbc.OracleDriver";
private static final String CONNECTION_URL = "jdbc:oracle:thin:@localhost:1521:orcl";
private static final String USER_NAME = "username";
private static final String PASSWORD = "password";

private static final String QUERY = "select EMPNO,ENAME,JOB,MGR,HIREDATE,SAL,COMM,DEPTNO,DNAME,LOC from SCOTT.emp INNER JOIN SCOTT.DEPT USING (DEPTNO)";

public static void main(String[] args) throws Exception {
File indexDir = new File(INDEX_DIR);
SimpleDBIndexer indexer = new SimpleDBIndexer();
try{  
   Class.forName(JDBC_DRIVER).newInstance();  
   Connection conn = DriverManager.getConnection(CONNECTION_URL, USER_NAME, PASSWORD);  
   SimpleAnalyzer analyzer = new SimpleAnalyzer(Version.LUCENE_35);  
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
   IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), indexWriterConfig);
   System.out.println("Indexing to directory '" + indexDir + "'...");  
   int indexedDocumentCount = indexer.indexDocs(indexWriter, conn);  
   indexWriter.close();  
   System.out.println(indexedDocumentCount + " records have been indexed successfully");
} catch (Exception e) {  
   e.printStackTrace();  
} 
}

int indexDocs(IndexWriter writer, Connection conn) throws Exception {  
  String sql = QUERY;  
  Statement stmt = conn.createStatement();  
  ResultSet rs = stmt.executeQuery(sql);  
  int i=0;
  while (rs.next()) {  
     Document d = new Document();  
     d.add(new Field("EMPNO", rs.getString("EMPNO"), Field.Store.YES, Field.Index.ANALYZED));  
     d.add(new Field("ENAME", rs.getString("ENAME"), Field.Store.YES, Field.Index.ANALYZED));  
     d.add(new Field("JOB", rs.getString("JOB"),Field.Store.YES, Field.Index.ANALYZED));
     d.add(new Field("MGR", rs.getString("MGR")==null?"":rs.getString("MGR"),Field.Store.YES, Field.Index.ANALYZED));
     String hireDateString = DateTools.dateToString(rs.getDate("HIREDATE"), Resolution.DAY);
     d.add(new Field("HIREDATE", hireDateString,Field.Store.YES, Field.Index.NOT_ANALYZED));   
     d.add(new Field("SAL", rs.getString("SAL"),Field.Store.YES, Field.Index.ANALYZED));
     d.add(new Field("COMM", rs.getString("COMM")==null?"":rs.getString("COMM"),Field.Store.YES, Field.Index.ANALYZED));
     d.add(new Field("DEPTNO", rs.getString("DEPTNO"),Field.Store.YES, Field.Index.ANALYZED));
     d.add(new Field("DNAME", rs.getString("DNAME"),Field.Store.YES, Field.Index.ANALYZED));
     d.add(new Field("LOC", rs.getString("LOC"),Field.Store.YES, Field.Index.ANALYZED));
     writer.addDocument(d);  
     i++;
 }  
  return i;
}
}

Third Minute:

Open SimpleDBSearcher class which is used to do search for data under Index directory:

Now:

Run it and check the query result from console :)

package com.juma.mohammad;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class SimpleDBSearcher {

private static final String LUCENE_QUERY = "HIREDATE:[19810510 TO 20091010] AND JOB:MANAGER";
private static final int MAX_HITS = 100;

public static void main(String[] args) throws Exception {
        File indexDir = new File(SimpleDBIndexer.INDEX_DIR);
        String query = LUCENE_QUERY;
        SimpleDBSearcher searcher = new SimpleDBSearcher();
        searcher.searchIndex(indexDir, query);

    }

    private void searchIndex(File indexDir, String queryStr) throws Exception {

        Directory directory = FSDirectory.open(indexDir);
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35,new String[] {"HIREDATE", "JOB"}, new StandardAnalyzer(Version.LUCENE_35));        
        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);       

        queryParser.setPhraseSlop(0);
        queryParser.setLowercaseExpandedTerms(true);
        Query query = queryParser.parse(queryStr);

        TopDocs topDocs = searcher.search(query, MAX_HITS);

        ScoreDoc[] hits = topDocs.scoreDocs;
        System.out.println(hits.length + " Record(s) Found");
        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println("\"Employee Name:\" " +d.get("ENAME") + ", \"Job:\" " + d.get("JOB") + ", \"Hire Date:\" " + d.get("HIREDATE"));

        }
        if(hits.length ==0){
        System.out.println("No Data Founds ");
        }

    }

}

Fourth Minute:

Every thing is running fine now, we can talk quickly about what we did.

SimpleDBIndexer:

We used JDBC to return data from SCOTT schema tables.

We created a new documet for each record returned and, and we mapped the result set values to the created document fields.

SimpleDBSearcher:

We prepared our lucene query first, then we used IndexSearcher to do search for documents under index directory !

Fifth Minute:

Nothing .... Just drink your coffe ;)

{{ tag }}, {{tag}},

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}
{{ parent.authors[0].realName || parent.author}}

{{ parent.authors[0].tagline || parent.tagline }}

{{ parent.views }} ViewsClicks
Tweet

{{parent.nComments}}