Java tutorial
package uk.co.flax.luwak.demo; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Locale; import com.google.common.base.Charsets; import com.google.common.base.Strings; import com.google.common.io.CharStreams; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.co.flax.luwak.*; import uk.co.flax.luwak.matchers.HighlightingMatcher; import uk.co.flax.luwak.matchers.HighlightsMatch; import uk.co.flax.luwak.presearcher.TermFilteredPresearcher; import uk.co.flax.luwak.queryparsers.LuceneQueryParser; /* * Copyright (c) 2013 Lemur Consulting Ltd. * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class LuwakDemo { public static final Analyzer ANALYZER = new StandardAnalyzer(); public static final String FIELD = "text"; public static final Logger logger = LoggerFactory.getLogger(LuwakDemo.class); public static void main(String... args) throws Exception { new LuwakDemo("src/test/resources/demoqueries", "src/test/resources/gutenberg"); } public LuwakDemo(String queriesFile, String inputDirectory) throws Exception { try (Monitor monitor = new Monitor(new LuceneQueryParser(FIELD, ANALYZER), new TermFilteredPresearcher())) { addQueries(monitor, queriesFile); DocumentBatch batch = DocumentBatch.of(buildDocs(inputDirectory)); Matches<HighlightsMatch> matches = monitor.match(batch, HighlightingMatcher.FACTORY); outputMatches(matches); } } static void addQueries(Monitor monitor, String queriesFile) throws Exception { List<MonitorQuery> queries = new ArrayList<>(); int count = 0; logger.info("Loading queries from {}", queriesFile); try (FileInputStream fis = new FileInputStream(queriesFile); BufferedReader br = new BufferedReader(new InputStreamReader(fis, Charsets.UTF_8))) { String queryString; while ((queryString = br.readLine()) != null) { if (Strings.isNullOrEmpty(queryString)) continue; logger.info("Parsing [{}]", queryString); queries.add( new MonitorQuery(String.format(Locale.ROOT, "%d-%s", count++, queryString), queryString)); } } monitor.update(queries); logger.info("Added {} queries to monitor", count); } static List<InputDocument> buildDocs(String inputDirectory) throws Exception { List<InputDocument> docs = new ArrayList<>(); logger.info("Reading documents from {}", inputDirectory); for (Path filePath : Files.newDirectoryStream(FileSystems.getDefault().getPath(inputDirectory))) { String content; try (FileInputStream fis = new FileInputStream(filePath.toFile()); InputStreamReader reader = new InputStreamReader(fis, Charsets.UTF_8)) { content = CharStreams.toString(reader); InputDocument doc = InputDocument.builder(filePath.toString()) .addField(FIELD, content, new StandardAnalyzer()).build(); docs.add(doc); } } return docs; } static void outputMatches(Matches<HighlightsMatch> matches) { logger.info("Matched batch of {} documents in {} milliseconds with {} queries run", matches.getBatchSize(), matches.getSearchTime(), matches.getQueriesRun()); for (DocumentMatches<HighlightsMatch> docMatches : matches) { logger.info("Matches from {}", docMatches.getDocId()); for (HighlightsMatch match : docMatches) { logger.info("\tQuery: {} ({} hits)", match.getQueryId(), match.getHitCount()); } } } }