Java tutorial
/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * See LICENSE.txt included in this distribution for the specific * language governing permissions and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at LICENSE.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2018, 2019 Oracle and/or its affiliates. All rights reserved. */ package org.opengrok.suggest; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; import org.opengrok.suggest.query.SuggesterPrefixQuery; import org.opengrok.suggest.query.SuggesterQuery; import java.io.Closeable; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import java.util.stream.Stream; /** * Provides an interface for accessing suggester functionality. */ public final class Suggester implements Closeable { private static final String PROJECTS_DISABLED_KEY = ""; private static final Logger logger = Logger.getLogger(Suggester.class.getName()); private final Map<String, SuggesterProjectData> projectData = new ConcurrentHashMap<>(); private final Object lock = new Object(); private final File suggesterDir; private int resultSize; private Duration awaitTerminationTime; private boolean allowMostPopular; private boolean projectsEnabled; private final Set<String> allowedFields; private final int timeThreshold; private final int rebuildParallelismLevel; // do NOT use fork join thread pool (work stealing thread pool) because it does not send interrupts upon cancellation private final ExecutorService executorService = Executors .newFixedThreadPool(Runtime.getRuntime().availableProcessors(), runnable -> { Thread thread = Executors.defaultThreadFactory().newThread(runnable); thread.setName("suggester-lookup-" + thread.getId()); return thread; }); /** * @param suggesterDir directory under which the suggester data should be created * @param resultSize maximum number of items that should be returned * @param awaitTerminationTime how much time to wait for suggester to initialize * @param allowMostPopular specifies if the most popular completion is enabled * @param projectsEnabled specifies if the OpenGrok projects are enabled * @param allowedFields fields for which should the suggester be enabled, * if {@code null} then enabled for all fields * @param timeThreshold time in milliseconds after which the suggestions requests should time out */ public Suggester(final File suggesterDir, final int resultSize, final Duration awaitTerminationTime, final boolean allowMostPopular, final boolean projectsEnabled, final Set<String> allowedFields, final int timeThreshold, final int rebuildParallelismLevel) { if (suggesterDir == null) { throw new IllegalArgumentException("Suggester needs to have directory specified"); } if (suggesterDir.exists() && !suggesterDir.isDirectory()) { throw new IllegalArgumentException(suggesterDir + " is not a directory"); } this.suggesterDir = suggesterDir; setResultSize(resultSize); setAwaitTerminationTime(awaitTerminationTime); this.allowMostPopular = allowMostPopular; this.projectsEnabled = projectsEnabled; this.allowedFields = new HashSet<>(allowedFields); this.timeThreshold = timeThreshold; this.rebuildParallelismLevel = rebuildParallelismLevel; } /** * Initializes suggester data for specified indexes. The data is initialized asynchronously. * @param luceneIndexes paths to Lucene indexes and name with which the index should be associated */ public void init(final Collection<NamedIndexDir> luceneIndexes) { if (luceneIndexes == null || luceneIndexes.isEmpty()) { logger.log(Level.INFO, "No index directories found, exiting..."); return; } if (!projectsEnabled && luceneIndexes.size() > 1) { throw new IllegalArgumentException("Projects are not enabled and multiple Lucene indexes were passed"); } synchronized (lock) { logger.log(Level.INFO, "Initializing suggester"); ExecutorService executor = Executors.newWorkStealingPool(rebuildParallelismLevel); for (NamedIndexDir indexDir : luceneIndexes) { submitInitIfIndexExists(executor, indexDir); } shutdownAndAwaitTermination(executor, "Suggester successfully initialized"); } } private void submitInitIfIndexExists(final ExecutorService executorService, final NamedIndexDir indexDir) { try { if (indexExists(indexDir.path)) { executorService.submit(getInitRunnable(indexDir)); } else { logger.log(Level.FINE, "Index in {0} directory does not exist, skipping...", indexDir); } } catch (IOException e) { logger.log(Level.WARNING, "Could not check if index exists", e); } } private Runnable getInitRunnable(final NamedIndexDir indexDir) { return () -> { try { Instant start = Instant.now(); logger.log(Level.FINE, "Initializing {0}", indexDir); SuggesterProjectData wfst = new SuggesterProjectData(FSDirectory.open(indexDir.path), getSuggesterDir(indexDir.name), allowMostPopular, allowedFields); wfst.init(); if (projectsEnabled) { projectData.put(indexDir.name, wfst); } else { projectData.put(PROJECTS_DISABLED_KEY, wfst); } Duration d = Duration.between(start, Instant.now()); logger.log(Level.FINE, "Finished initialization of {0}, took {1}", new Object[] { indexDir, d }); } catch (Exception e) { logger.log(Level.SEVERE, "Could not initialize suggester data for " + indexDir, e); } }; } private Path getSuggesterDir(final String indexDirName) { if (projectsEnabled) { return suggesterDir.toPath().resolve(indexDirName); } else { return this.suggesterDir.toPath(); } } private boolean indexExists(final Path indexDir) throws IOException { try (Directory indexDirectory = FSDirectory.open(indexDir)) { return DirectoryReader.indexExists(indexDirectory); } } private void shutdownAndAwaitTermination(final ExecutorService executorService, final String logMessageOnSuccess) { executorService.shutdown(); try { executorService.awaitTermination(awaitTerminationTime.toMillis(), TimeUnit.MILLISECONDS); logger.log(Level.INFO, logMessageOnSuccess); } catch (InterruptedException e) { logger.log(Level.SEVERE, "Interrupted while building suggesters", e); Thread.currentThread().interrupt(); } } /** * Rebuilds the data structures for specified indexes. * @param indexDirs paths to lucene indexes and name with which the index should be associated */ public void rebuild(final Collection<NamedIndexDir> indexDirs) { if (indexDirs == null || indexDirs.isEmpty()) { logger.log(Level.INFO, "Not rebuilding suggester data because no index directories were specified"); return; } synchronized (lock) { logger.log(Level.INFO, "Rebuilding the following suggesters: {0}", indexDirs); ExecutorService executor = Executors.newWorkStealingPool(rebuildParallelismLevel); for (NamedIndexDir indexDir : indexDirs) { SuggesterProjectData data = this.projectData.get(indexDir.name); if (data != null) { executor.submit(getRebuildRunnable(data)); } else { submitInitIfIndexExists(executor, indexDir); } } shutdownAndAwaitTermination(executor, "Suggesters for " + indexDirs + " were successfully rebuilt"); } } private Runnable getRebuildRunnable(final SuggesterProjectData data) { return () -> { try { Instant start = Instant.now(); logger.log(Level.FINE, "Rebuilding {0}", data); data.rebuild(); Duration d = Duration.between(start, Instant.now()); logger.log(Level.FINE, "Rebuild of {0} finished, took {1}", new Object[] { data, d }); } catch (Exception e) { logger.log(Level.SEVERE, "Could not rebuild suggester", e); } }; } /** * Removes the data associated with the provided names. * @param names names of the indexes to delete */ public void remove(final Iterable<String> names) { if (names == null) { return; } synchronized (lock) { logger.log(Level.INFO, "Removing following suggesters: {0}", names); for (String suggesterName : names) { SuggesterProjectData collection = projectData.get(suggesterName); if (collection == null) { logger.log(Level.WARNING, "Unknown suggester {0}", suggesterName); continue; } collection.remove(); projectData.remove(suggesterName); } } } /** * Retrieves suggestions based on the specified parameters. * @param indexReaders index readers with specified name (OpenGrok's project name) * @param suggesterQuery query for suggestions * @param query query on which the suggestions depend * @return suggestions */ public Suggestions search(final List<NamedIndexReader> indexReaders, final SuggesterQuery suggesterQuery, final Query query) { if (indexReaders == null || suggesterQuery == null) { return new Suggestions(Collections.emptyList(), true); } List<NamedIndexReader> readers = indexReaders; if (!projectsEnabled) { readers = Collections .singletonList(new NamedIndexReader(PROJECTS_DISABLED_KEY, indexReaders.get(0).getReader())); } Suggestions suggestions; if (!SuggesterUtils.isComplexQuery(query, suggesterQuery)) { // use WFST for lone prefix suggestions = prefixLookup(readers, (SuggesterPrefixQuery) suggesterQuery); } else { suggestions = complexLookup(readers, suggesterQuery, query); } return new Suggestions(SuggesterUtils.combineResults(suggestions.items, resultSize), suggestions.partialResult); } private Suggestions prefixLookup(final List<NamedIndexReader> readers, final SuggesterPrefixQuery suggesterQuery) { BooleanWrapper partialResult = new BooleanWrapper(); List<LookupResultItem> results = readers.parallelStream().flatMap(namedIndexReader -> { SuggesterProjectData data = projectData.get(namedIndexReader.name); if (data == null) { logger.log(Level.FINE, "{0} not yet initialized", namedIndexReader.name); partialResult.value = true; return Stream.empty(); } boolean gotLock = data.tryLock(); if (!gotLock) { // do not wait for rebuild partialResult.value = true; return Stream.empty(); } try { String prefix = suggesterQuery.getPrefix().text(); return data.lookup(suggesterQuery.getField(), prefix, resultSize).stream() .map(item -> new LookupResultItem(item.key.toString(), namedIndexReader.name, item.value)); } finally { data.unlock(); } }).collect(Collectors.toList()); return new Suggestions(results, partialResult.value); } private Suggestions complexLookup(final List<NamedIndexReader> readers, final SuggesterQuery suggesterQuery, final Query query) { List<LookupResultItem> results = new ArrayList<>(readers.size() * resultSize); List<SuggesterSearchTask> searchTasks = new ArrayList<>(readers.size()); for (NamedIndexReader ir : readers) { searchTasks.add(new SuggesterSearchTask(ir, query, suggesterQuery, results)); } List<Future<Void>> futures; try { futures = executorService.invokeAll(searchTasks, timeThreshold, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { logger.log(Level.WARNING, "Interrupted while invoking suggester search", e); Thread.currentThread().interrupt(); return new Suggestions(Collections.emptyList(), true); } boolean partialResult = futures.stream().anyMatch(Future::isCancelled); // wait for tasks to finish for (SuggesterSearchTask searchTask : searchTasks) { if (!searchTask.started) { continue; } if (!searchTask.finished) { synchronized (searchTask) { while (!searchTask.finished) { try { searchTask.wait(); } catch (InterruptedException e) { logger.log(Level.WARNING, "Interrupted while waiting for task: {0}", searchTask); Thread.currentThread().interrupt(); } } } } } return new Suggestions(results, partialResult); } /** * Handler for search events. * @param projects projects that the {@code query} was used to search in * @param query query that was used to perform the search */ public void onSearch(final Iterable<String> projects, final Query query) { if (!allowMostPopular || projects == null) { return; } try { List<Term> terms = SuggesterUtils.intoTerms(query); if (!projectsEnabled) { for (Term t : terms) { SuggesterProjectData data = projectData.get(PROJECTS_DISABLED_KEY); if (data != null) { data.incrementSearchCount(t); } } } else { for (String project : projects) { for (Term t : terms) { SuggesterProjectData data = projectData.get(project); if (data != null) { data.incrementSearchCount(t); } } } } } catch (Exception e) { logger.log(Level.FINE, "Could not update search count map", e); } } /** * Sets the new maximum number of elements the suggester should suggest. * @param resultSize new number of suggestions to return */ public void setResultSize(final int resultSize) { if (resultSize < 0) { throw new IllegalArgumentException("Result size cannot be negative"); } this.resultSize = resultSize; } /** * Sets the new duration for which to await the initialization of the suggester data. Does not affect already * running initialization. * @param awaitTerminationTime maximum duration for which to wait for initialization */ public void setAwaitTerminationTime(final Duration awaitTerminationTime) { if (awaitTerminationTime.isNegative() || awaitTerminationTime.isZero()) { throw new IllegalArgumentException( "Time to await termination of building the suggester data cannot be 0 or negative"); } this.awaitTerminationTime = awaitTerminationTime; } /** * Increases search counts for specific term. * @param project project where the term resides * @param term term for which to increase search count * @param value positive value by which to increase the search count */ public void increaseSearchCount(final String project, final Term term, final int value) { if (!allowMostPopular) { return; } SuggesterProjectData data; if (!projectsEnabled) { data = projectData.get(PROJECTS_DISABLED_KEY); } else { data = projectData.get(project); } if (data == null) { logger.log(Level.WARNING, "Cannot update search count because of missing suggester data{}", projectsEnabled ? " for project " + project : ""); return; } data.incrementSearchCount(term, value); } /** * Returns the searched terms sorted according to their popularity. * @param project project for which to return the data * @param field field for which to return the data * @param page which page of data to retrieve * @param pageSize number of results to return * @return list of terms with their popularity */ public List<Entry<BytesRef, Integer>> getSearchCounts(final String project, final String field, final int page, final int pageSize) { SuggesterProjectData data = projectData.get(project); if (data == null) { logger.log(Level.FINE, "Cannot retrieve search counts because suggester data for project {0} was not found", project); return Collections.emptyList(); } return data.getSearchCountsSorted(field, page, pageSize); } /** * Closes opened resources. */ @Override public void close() { executorService.shutdownNow(); projectData.values().forEach(f -> { try { f.close(); } catch (IOException e) { logger.log(Level.WARNING, "Could not close suggester data " + f, e); } }); } private class SuggesterSearchTask implements Callable<Void> { private final NamedIndexReader namedIndexReader; private final Query query; private final SuggesterQuery suggesterQuery; private final List<LookupResultItem> results; private volatile boolean finished = false; private volatile boolean started = false; SuggesterSearchTask(final NamedIndexReader namedIndexReader, final Query query, final SuggesterQuery suggesterQuery, final List<LookupResultItem> results) { this.namedIndexReader = namedIndexReader; this.query = query; this.suggesterQuery = suggesterQuery; this.results = results; } @Override public Void call() { try { started = true; SuggesterProjectData data = projectData.get(namedIndexReader.name); if (data == null) { logger.log(Level.FINE, "{0} not yet initialized", namedIndexReader.name); return null; } boolean gotLock = data.tryLock(); if (!gotLock) { // do not wait for rebuild return null; } try { SuggesterSearcher searcher = new SuggesterSearcher(namedIndexReader.reader, resultSize); List<LookupResultItem> resultItems = searcher.suggest(query, namedIndexReader.name, suggesterQuery, data.getSearchCounts(suggesterQuery.getField())); synchronized (results) { results.addAll(resultItems); } } finally { data.unlock(); } } finally { synchronized (this) { finished = true; this.notifyAll(); } } return null; } } /** * Result suggestions data. */ public static class Suggestions { private final List<LookupResultItem> items; private final boolean partialResult; public Suggestions(final List<LookupResultItem> items, final boolean partialResult) { this.items = items; this.partialResult = partialResult; } public List<LookupResultItem> getItems() { return items; } public boolean isPartialResult() { return partialResult; } } /** * Model classes for holding project name and path to its index directory. */ public static class NamedIndexDir { /** * Name of the project. */ private final String name; /** * Path to index directory for project with name {@link #name}. */ private final Path path; public NamedIndexDir(final String name, final Path path) { if (name == null) { throw new IllegalArgumentException("Name cannot be null"); } if (path == null) { throw new IllegalArgumentException("Path cannot be null"); } this.name = name; this.path = path; } public String getName() { return name; } public Path getPath() { return path; } @Override public String toString() { return name; } } /** * Model class to hold the project name and its {@link IndexReader}. */ public static class NamedIndexReader { /** * Name of the project. */ private final String name; /** * IndexReader of the project with {@link #name}. */ private final IndexReader reader; public NamedIndexReader(final String name, final IndexReader reader) { if (name == null) { throw new IllegalArgumentException("Name cannot be null"); } if (reader == null) { throw new IllegalArgumentException("Reader cannot be null"); } this.name = name; this.reader = reader; } public String getName() { return name; } public IndexReader getReader() { return reader; } @Override public String toString() { return name; } } private static class BooleanWrapper { private volatile boolean value; } }