jfix.search.FullTextIndex.java Source code

Java tutorial

Introduction

Here is the source code for jfix.search.FullTextIndex.java

Source

/*
Copyright (C) 2016 maik.jablonski@jease.org
    
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package jfix.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.util.ArrayList;
import java.util.List;

public class FullTextIndex<E> {

    // + - && || ! ( ) { } [ ] ^ " ~ * ? : \
    private static final String[] STRINGS_TO_QUOTE = new String[] { "-", "&", "!", "{", "}", "[", "]", ":", "?" };

    static {
        BooleanQuery.setMaxClauseCount(BooleanQuery.getMaxClauseCount() * 10);
    }

    private List<E> objects;
    private Directory indexDirectory;
    private IndexWriter indexWriter;
    private QueryParser queryParser;
    private Document document;
    private Field fulltext;

    public FullTextIndex() {
        try {
            objects = new ArrayList<>();

            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig config = new IndexWriterConfig(
                    new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE));

            indexDirectory = new RAMDirectory();
            indexWriter = new IndexWriter(indexDirectory, config);

            queryParser = new QueryParser("text", analyzer);
            queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);

            fulltext = new TextField("text", "", Field.Store.NO);

            // Used as base-set for a NOT-Query
            Field inverse = new TextField("true", "yes", Field.Store.NO);

            document = new Document();
            document.add(fulltext);
            document.add(inverse);
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    public void add(E object, String text) {
        try {
            if (object != null && text != null) {
                objects.add(object);
                fulltext.setStringValue(appendWithoutPunctuation(text));
                indexWriter.addDocument(document);
            }
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    public void commit() {
        try {
            indexWriter.commit();
            indexWriter.close();
        } catch (Exception e) {
            throw new RuntimeException(e.getMessage(), e);
        }
    }

    public List<E> search(String search) throws Exception {
        try {
            ObjectCollector<E> collector = new ObjectCollector<>(objects);
            String query = buildQueryString(search);
            IndexReader indexReader = DirectoryReader.open(indexDirectory);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            indexSearcher.search(queryParser.parse(query), collector);
            indexReader.close();
            return collector.getOutput();
        } catch (Exception e) {
            throw new Exception("Query Syntax Error: " + search);
        }
    }

    protected String buildQueryString(String search) {
        for (String stringToQuote : STRINGS_TO_QUOTE) {
            search = search.replace(stringToQuote, "\\" + stringToQuote);
        }
        if (search.startsWith("^")) {
            search = "true:yes+" + search;
        }
        return search.replace("+", " AND ").replace("|", " OR ").replace("^", " NOT ");
    }

    private String appendWithoutPunctuation(String str) {
        int strlen = str.length();
        StringBuilder sb = new StringBuilder(2 * strlen + 2);
        sb.append(str);
        sb.append(" ");
        for (int i = 0; i < strlen; i++) {
            char c = str.charAt(i);
            if (Character.isLetterOrDigit(c) || Character.isWhitespace(c)) {
                sb.append(c);
            } else {
                sb.append(" ");
            }
        }
        return sb.toString();
    }

}