uk.co.flax.luwak.demo.LuwakDemo.java Source code

Introduction

Here is the source code for uk.co.flax.luwak.demo.LuwakDemo.java
Source

package uk.co.flax.luwak.demo;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;

import com.google.common.base.Charsets;
import com.google.common.base.Strings;
import com.google.common.io.CharStreams;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.co.flax.luwak.*;
import uk.co.flax.luwak.matchers.HighlightingMatcher;
import uk.co.flax.luwak.matchers.HighlightsMatch;
import uk.co.flax.luwak.presearcher.TermFilteredPresearcher;
import uk.co.flax.luwak.queryparsers.LuceneQueryParser;

/*
 * Copyright (c) 2013 Lemur Consulting Ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class LuwakDemo {

    public static final Analyzer ANALYZER = new StandardAnalyzer();

    public static final String FIELD = "text";

    public static final Logger logger = LoggerFactory.getLogger(LuwakDemo.class);

    public static void main(String... args) throws Exception {
        new LuwakDemo("src/test/resources/demoqueries", "src/test/resources/gutenberg");
    }

    public LuwakDemo(String queriesFile, String inputDirectory) throws Exception {

        try (Monitor monitor = new Monitor(new LuceneQueryParser(FIELD, ANALYZER), new TermFilteredPresearcher())) {
            addQueries(monitor, queriesFile);
            DocumentBatch batch = DocumentBatch.of(buildDocs(inputDirectory));
            Matches<HighlightsMatch> matches = monitor.match(batch, HighlightingMatcher.FACTORY);
            outputMatches(matches);
        }
    }

    static void addQueries(Monitor monitor, String queriesFile) throws Exception {
        List<MonitorQuery> queries = new ArrayList<>();
        int count = 0;
        logger.info("Loading queries from {}", queriesFile);
        try (FileInputStream fis = new FileInputStream(queriesFile);
                BufferedReader br = new BufferedReader(new InputStreamReader(fis, Charsets.UTF_8))) {
            String queryString;
            while ((queryString = br.readLine()) != null) {
                if (Strings.isNullOrEmpty(queryString))
                    continue;
                logger.info("Parsing [{}]", queryString);
                queries.add(
                        new MonitorQuery(String.format(Locale.ROOT, "%d-%s", count++, queryString), queryString));
            }
        }
        monitor.update(queries);
        logger.info("Added {} queries to monitor", count);
    }

    static List<InputDocument> buildDocs(String inputDirectory) throws Exception {
        List<InputDocument> docs = new ArrayList<>();
        logger.info("Reading documents from {}", inputDirectory);
        for (Path filePath : Files.newDirectoryStream(FileSystems.getDefault().getPath(inputDirectory))) {
            String content;
            try (FileInputStream fis = new FileInputStream(filePath.toFile());
                    InputStreamReader reader = new InputStreamReader(fis, Charsets.UTF_8)) {
                content = CharStreams.toString(reader);
                InputDocument doc = InputDocument.builder(filePath.toString())
                        .addField(FIELD, content, new StandardAnalyzer()).build();
                docs.add(doc);
            }
        }
        return docs;
    }

    static void outputMatches(Matches<HighlightsMatch> matches) {

        logger.info("Matched batch of {} documents in {} milliseconds with {} queries run", matches.getBatchSize(),
                matches.getSearchTime(), matches.getQueriesRun());
        for (DocumentMatches<HighlightsMatch> docMatches : matches) {
            logger.info("Matches from {}", docMatches.getDocId());
            for (HighlightsMatch match : docMatches) {
                logger.info("\tQuery: {} ({} hits)", match.getQueryId(), match.getHitCount());
            }
        }

    }

}