fr.univ_tours.etu.searcher.LikeThisTest.java Source code

Java tutorial

Introduction

Here is the source code for fr.univ_tours.etu.searcher.LikeThisTest.java

Source

/*
 *  This work is licensed for the ULB Group13 BKT for the DBSA course.
 */
package fr.univ_tours.etu.searcher;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

/**
 *
 * @author Anas Alba
 */
public class LikeThisTest {

    public static void main(String[] args) throws IOException {
        LikeThisTest m = new LikeThisTest();
        m.init();
        m.writerEntries();
        m.findSilimar("doduck prototype");
    }

    private Directory indexDir;
    private StandardAnalyzer analyzer;
    private IndexWriterConfig config;

    public void init() throws IOException {
        analyzer = new StandardAnalyzer();
        config = new IndexWriterConfig(analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        indexDir = new RAMDirectory(); //don't write on disk
        //indexDir = FSDirectory.open(new File("/Path/to/luceneIndex/")); //write on disk
    }

    public void writerEntries() throws IOException {
        IndexWriterConfig config = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(indexDir, config);

        Document doc1 = createDocument("1", "doduck", "prototype your idea");
        Document doc2 = createDocument("2", "doduck", "love programming");
        Document doc3 = createDocument("3", "We do", "prototype");
        Document doc4 = createDocument("4", "We love", "challange");
        indexWriter.addDocument(doc1);
        indexWriter.addDocument(doc2);
        indexWriter.addDocument(doc3);
        indexWriter.addDocument(doc4);

        indexWriter.commit();
        indexWriter.forceMerge(100, true);
        indexWriter.close();
    }

    private Document createDocument(String id, String title, String content) {
        FieldType type = new FieldType();
        type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        type.setStored(true);
        type.setTokenized(true);
        //        type.setStoreTermVectorOffsets(true);
        Document doc = new Document();
        doc.add(new StringField("id", id, Field.Store.YES));
        doc.add(new Field("title", title, type));
        doc.add(new Field("content", content, type));
        return doc;
    }

    private void findSilimar(String searchForSimilar) throws IOException {
        IndexReader reader = DirectoryReader.open(indexDir);
        IndexSearcher indexSearcher = new IndexSearcher(reader);

        MoreLikeThis mlt = new MoreLikeThis(reader);
        mlt.setMinTermFreq(0);
        mlt.setMinDocFreq(0);
        mlt.setFieldNames(new String[] { "title", "content" });
        mlt.setAnalyzer(analyzer);

        Reader sReader = new StringReader(searchForSimilar);
        Query query = mlt.like("content", sReader);

        TopDocs topDocs = indexSearcher.search(query, 10);

        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            Document aSimilar = indexSearcher.doc(scoreDoc.doc);
            String similarTitle = aSimilar.get("title");
            String similarContent = aSimilar.get("content");

            System.out.println("====similar finded====");
            System.out.println("title: " + similarTitle);
            System.out.println("content: " + similarContent);
        }

    }
}