Java tutorial
// Copyright (C) 2013 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.gerrit.lucene; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.gerrit.server.git.QueueProvider.QueueType.INTERACTIVE; import static com.google.gerrit.server.index.IndexRewriteImpl.CLOSED_STATUSES; import static com.google.gerrit.server.index.IndexRewriteImpl.OPEN_STATUSES; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.gerrit.common.Nullable; import com.google.gerrit.reviewdb.client.Account; import com.google.gerrit.reviewdb.client.Change; import com.google.gerrit.reviewdb.client.PatchSet; import com.google.gerrit.reviewdb.server.ReviewDb; import com.google.gerrit.server.config.ConfigUtil; import com.google.gerrit.server.config.GerritServerConfig; import com.google.gerrit.server.config.SitePaths; import com.google.gerrit.server.index.ChangeField; import com.google.gerrit.server.index.ChangeField.ChangeProtoField; import com.google.gerrit.server.index.ChangeField.PatchSetApprovalProtoField; import com.google.gerrit.server.index.ChangeField.PatchSetProtoField; import com.google.gerrit.server.index.ChangeIndex; import com.google.gerrit.server.index.FieldDef; import com.google.gerrit.server.index.FieldDef.FillArgs; import com.google.gerrit.server.index.FieldType; import com.google.gerrit.server.index.IndexExecutor; import com.google.gerrit.server.index.IndexRewriteImpl; import com.google.gerrit.server.index.Schema; import com.google.gerrit.server.index.Schema.Values; import com.google.gerrit.server.query.Predicate; import com.google.gerrit.server.query.QueryParseException; import com.google.gerrit.server.query.change.ChangeData; import com.google.gerrit.server.query.change.ChangeDataSource; import com.google.gerrit.server.query.change.LegacyChangeIdPredicate; import com.google.gwtorm.protobuf.ProtobufCodec; import com.google.gwtorm.server.OrmException; import com.google.gwtorm.server.ResultSet; import com.google.inject.Provider; import com.google.inject.assistedinject.Assisted; import com.google.inject.assistedinject.AssistedInject; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.IntField; import org.apache.lucene.document.LongField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.BytesRef; import org.eclipse.jgit.errors.ConfigInvalidException; import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.storage.file.FileBasedConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; /** * Secondary index implementation using Apache Lucene. * <p> * Writes are managed using a single {@link IndexWriter} per process, committed * aggressively. Reads use {@link SearcherManager} and periodically refresh, * though there may be some lag between a committed write and it showing up to * other threads' searchers. */ public class LuceneChangeIndex implements ChangeIndex { private static final Logger log = LoggerFactory.getLogger(LuceneChangeIndex.class); public static final String CHANGES_OPEN = "open"; public static final String CHANGES_CLOSED = "closed"; private static final String ADDED_FIELD = ChangeField.ADDED.getName(); private static final String APPROVAL_FIELD = ChangeField.APPROVAL.getName(); private static final String CHANGE_FIELD = ChangeField.CHANGE.getName(); private static final String DELETED_FIELD = ChangeField.DELETED.getName(); private static final String ID_FIELD = ChangeField.LEGACY_ID2.getName(); private static final String MERGEABLE_FIELD = ChangeField.MERGEABLE.getName(); private static final String PATCH_SET_FIELD = ChangeField.PATCH_SET.getName(); private static final String REVIEWEDBY_FIELD = ChangeField.REVIEWEDBY.getName(); private static final String UPDATED_SORT_FIELD = sortFieldName(ChangeField.UPDATED); private static final ImmutableSet<String> FIELDS = ImmutableSet.of(ADDED_FIELD, APPROVAL_FIELD, CHANGE_FIELD, DELETED_FIELD, ID_FIELD, MERGEABLE_FIELD, PATCH_SET_FIELD, REVIEWEDBY_FIELD); private static final Map<String, String> CUSTOM_CHAR_MAPPING = ImmutableMap.of("_", " ", ".", " "); public static void setReady(SitePaths sitePaths, int version, boolean ready) throws IOException { try { FileBasedConfig cfg = LuceneVersionManager.loadGerritIndexConfig(sitePaths); LuceneVersionManager.setReady(cfg, version, ready); cfg.save(); } catch (ConfigInvalidException e) { throw new IOException(e); } } private static String sortFieldName(FieldDef<?, ?> f) { return f.getName() + "_SORT"; } static interface Factory { LuceneChangeIndex create(Schema<ChangeData> schema, String base); } static class GerritIndexWriterConfig { private final IndexWriterConfig luceneConfig; private long commitWithinMs; private GerritIndexWriterConfig(Config cfg, String name) { CustomMappingAnalyzer analyzer = new CustomMappingAnalyzer(new StandardAnalyzer(CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING); luceneConfig = new IndexWriterConfig(analyzer).setOpenMode(OpenMode.CREATE_OR_APPEND) .setCommitOnClose(true); double m = 1 << 20; luceneConfig.setRAMBufferSizeMB(cfg.getLong("index", name, "ramBufferSize", (long) (IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB * m)) / m); luceneConfig.setMaxBufferedDocs( cfg.getInt("index", name, "maxBufferedDocs", IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS)); try { commitWithinMs = ConfigUtil.getTimeUnit(cfg, "index", name, "commitWithin", MILLISECONDS.convert(5, MINUTES), MILLISECONDS); } catch (IllegalArgumentException e) { commitWithinMs = cfg.getLong("index", name, "commitWithin", 0); } } IndexWriterConfig getLuceneConfig() { return luceneConfig; } long getCommitWithinMs() { return commitWithinMs; } } private final SitePaths sitePaths; private final FillArgs fillArgs; private final ListeningExecutorService executor; private final Provider<ReviewDb> db; private final ChangeData.Factory changeDataFactory; private final Schema<ChangeData> schema; private final QueryBuilder queryBuilder; private final SubIndex openIndex; private final SubIndex closedIndex; private final String idSortField; /** * Whether to use DocValues for range/sorted numeric fields. * <p> * Lucene 5 removed support for sorting based on normal numeric fields, so we * use the newer API for more strongly typed numeric fields in newer schema * versions. These fields also are not stored, so we need to store auxiliary * stored-only field for them as well. */ // TODO(dborowitz): Delete when we delete support for pre-Lucene-5.0 schemas. private final boolean useDocValuesForSorting; @AssistedInject LuceneChangeIndex(@GerritServerConfig Config cfg, SitePaths sitePaths, @IndexExecutor(INTERACTIVE) ListeningExecutorService executor, Provider<ReviewDb> db, ChangeData.Factory changeDataFactory, FillArgs fillArgs, @Assisted Schema<ChangeData> schema, @Assisted @Nullable String base) throws IOException { this.sitePaths = sitePaths; this.fillArgs = fillArgs; this.executor = executor; this.db = db; this.changeDataFactory = changeDataFactory; this.schema = schema; this.useDocValuesForSorting = schema.getVersion() >= 15; this.idSortField = sortFieldName(LegacyChangeIdPredicate.idField(schema)); CustomMappingAnalyzer analyzer = new CustomMappingAnalyzer(new StandardAnalyzer(CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING); queryBuilder = new QueryBuilder(analyzer); BooleanQuery .setMaxClauseCount(cfg.getInt("index", "defaultMaxClauseCount", BooleanQuery.getMaxClauseCount())); GerritIndexWriterConfig openConfig = new GerritIndexWriterConfig(cfg, "changes_open"); GerritIndexWriterConfig closedConfig = new GerritIndexWriterConfig(cfg, "changes_closed"); SearcherFactory searcherFactory = newSearcherFactory(); if (cfg.getBoolean("index", "lucene", "testInmemory", false)) { openIndex = new SubIndex(new RAMDirectory(), "ramOpen", openConfig, searcherFactory); closedIndex = new SubIndex(new RAMDirectory(), "ramClosed", closedConfig, searcherFactory); } else { Path dir = base != null ? Paths.get(base) : LuceneVersionManager.getDir(sitePaths, schema); openIndex = new SubIndex(dir.resolve(CHANGES_OPEN), openConfig, searcherFactory); closedIndex = new SubIndex(dir.resolve(CHANGES_CLOSED), closedConfig, searcherFactory); } } private SearcherFactory newSearcherFactory() { if (useDocValuesForSorting) { return new SearcherFactory(); } @SuppressWarnings("deprecation") final Map<String, UninvertingReader.Type> mapping = ImmutableMap.of(ChangeField.LEGACY_ID.getName(), UninvertingReader.Type.INTEGER, ChangeField.UPDATED.getName(), UninvertingReader.Type.LONG); return new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException { checkState(reader instanceof DirectoryReader, "expected DirectoryReader, found %s", reader.getClass().getName()); return new IndexSearcher(UninvertingReader.wrap((DirectoryReader) reader, mapping)); } }; } @Override public void close() { List<ListenableFuture<?>> closeFutures = Lists.newArrayListWithCapacity(2); closeFutures.add(executor.submit(new Runnable() { @Override public void run() { openIndex.close(); } })); closeFutures.add(executor.submit(new Runnable() { @Override public void run() { closedIndex.close(); } })); Futures.getUnchecked(Futures.allAsList(closeFutures)); } @Override public Schema<ChangeData> getSchema() { return schema; } @Override public void replace(ChangeData cd) throws IOException { Term id = QueryBuilder.idTerm(schema, cd); Document doc = toDocument(cd); try { if (cd.change().getStatus().isOpen()) { Futures.allAsList(closedIndex.delete(id), openIndex.replace(id, doc)).get(); } else { Futures.allAsList(openIndex.delete(id), closedIndex.replace(id, doc)).get(); } } catch (OrmException | ExecutionException | InterruptedException e) { throw new IOException(e); } } @Override public void delete(Change.Id id) throws IOException { Term idTerm = QueryBuilder.idTerm(schema, id); try { Futures.allAsList(openIndex.delete(idTerm), closedIndex.delete(idTerm)).get(); } catch (ExecutionException | InterruptedException e) { throw new IOException(e); } } @Override public void deleteAll() throws IOException { openIndex.deleteAll(); closedIndex.deleteAll(); } @Override public ChangeDataSource getSource(Predicate<ChangeData> p, int start, int limit) throws QueryParseException { Set<Change.Status> statuses = IndexRewriteImpl.getPossibleStatus(p); List<SubIndex> indexes = Lists.newArrayListWithCapacity(2); if (!Sets.intersection(statuses, OPEN_STATUSES).isEmpty()) { indexes.add(openIndex); } if (!Sets.intersection(statuses, CLOSED_STATUSES).isEmpty()) { indexes.add(closedIndex); } return new QuerySource(indexes, queryBuilder.toQuery(p), start, limit, getSort()); } @Override public void markReady(boolean ready) throws IOException { setReady(sitePaths, schema.getVersion(), ready); } @SuppressWarnings("deprecation") private Sort getSort() { if (useDocValuesForSorting) { return new Sort(new SortField(UPDATED_SORT_FIELD, SortField.Type.LONG, true), new SortField(idSortField, SortField.Type.LONG, true)); } else { return new Sort(new SortField(ChangeField.UPDATED.getName(), SortField.Type.LONG, true), new SortField(ChangeField.LEGACY_ID.getName(), SortField.Type.INT, true)); } } private class QuerySource implements ChangeDataSource { private final List<SubIndex> indexes; private final Query query; private final int start; private final int limit; private final Sort sort; private QuerySource(List<SubIndex> indexes, Query query, int start, int limit, Sort sort) { this.indexes = indexes; this.query = checkNotNull(query, "null query from Lucene"); this.start = start; this.limit = limit; this.sort = sort; } @Override public int getCardinality() { return 10; // TODO(dborowitz): estimate from Lucene? } @Override public boolean hasChange() { return false; } @Override public String toString() { return query.toString(); } @Override public ResultSet<ChangeData> read() throws OrmException { IndexSearcher[] searchers = new IndexSearcher[indexes.size()]; try { int realLimit = start + limit; TopFieldDocs[] hits = new TopFieldDocs[indexes.size()]; for (int i = 0; i < indexes.size(); i++) { searchers[i] = indexes.get(i).acquire(); hits[i] = searchers[i].search(query, realLimit, sort); } TopDocs docs = TopDocs.merge(sort, realLimit, hits); List<ChangeData> result = Lists.newArrayListWithCapacity(docs.scoreDocs.length); for (int i = start; i < docs.scoreDocs.length; i++) { ScoreDoc sd = docs.scoreDocs[i]; Document doc = searchers[sd.shardIndex].doc(sd.doc, FIELDS); result.add(toChangeData(doc)); } final List<ChangeData> r = Collections.unmodifiableList(result); return new ResultSet<ChangeData>() { @Override public Iterator<ChangeData> iterator() { return r.iterator(); } @Override public List<ChangeData> toList() { return r; } @Override public void close() { // Do nothing. } }; } catch (IOException e) { throw new OrmException(e); } finally { for (int i = 0; i < indexes.size(); i++) { if (searchers[i] != null) { try { indexes.get(i).release(searchers[i]); } catch (IOException e) { log.warn("cannot release Lucene searcher", e); } } } } } } private ChangeData toChangeData(Document doc) { BytesRef cb = doc.getBinaryValue(CHANGE_FIELD); if (cb == null) { int id = doc.getField(ID_FIELD).numericValue().intValue(); return changeDataFactory.create(db.get(), new Change.Id(id)); } // Change proto. Change change = ChangeProtoField.CODEC.decode(cb.bytes, cb.offset, cb.length); ChangeData cd = changeDataFactory.create(db.get(), change); // Patch sets. List<PatchSet> patchSets = decodeProtos(doc, PATCH_SET_FIELD, PatchSetProtoField.CODEC); if (!patchSets.isEmpty()) { // Will be an empty list for schemas prior to when this field was stored; // this cannot be valid since a change needs at least one patch set. cd.setPatchSets(patchSets); } // Approvals. cd.setCurrentApprovals(decodeProtos(doc, APPROVAL_FIELD, PatchSetApprovalProtoField.CODEC)); // Changed lines. IndexableField added = doc.getField(ADDED_FIELD); IndexableField deleted = doc.getField(DELETED_FIELD); if (added != null && deleted != null) { cd.setChangedLines(added.numericValue().intValue(), deleted.numericValue().intValue()); } // Mergeable. String mergeable = doc.get(MERGEABLE_FIELD); if ("1".equals(mergeable)) { cd.setMergeable(true); } else if ("0".equals(mergeable)) { cd.setMergeable(false); } // Reviewed-by. IndexableField[] reviewedBy = doc.getFields(REVIEWEDBY_FIELD); if (reviewedBy.length > 0) { Set<Account.Id> accounts = Sets.newHashSetWithExpectedSize(reviewedBy.length); for (IndexableField r : reviewedBy) { int id = r.numericValue().intValue(); if (reviewedBy.length == 1 && id == ChangeField.NOT_REVIEWED) { break; } accounts.add(new Account.Id(id)); } cd.setReviewedBy(accounts); } return cd; } private static <T> List<T> decodeProtos(Document doc, String fieldName, ProtobufCodec<T> codec) { BytesRef[] bytesRefs = doc.getBinaryValues(fieldName); if (bytesRefs.length == 0) { return Collections.emptyList(); } List<T> result = new ArrayList<>(bytesRefs.length); for (BytesRef r : bytesRefs) { result.add(codec.decode(r.bytes, r.offset, r.length)); } return result; } private Document toDocument(ChangeData cd) { Document result = new Document(); for (Values<ChangeData> vs : schema.buildFields(cd, fillArgs)) { if (vs.getValues() != null) { add(result, vs); } } return result; } @SuppressWarnings("deprecation") private void add(Document doc, Values<ChangeData> values) { String name = values.getField().getName(); FieldType<?> type = values.getField().getType(); Store store = store(values.getField()); if (useDocValuesForSorting) { FieldDef<ChangeData, ?> f = values.getField(); if (f == ChangeField.LEGACY_ID || f == ChangeField.LEGACY_ID2) { int v = (Integer) getOnlyElement(values.getValues()); doc.add(new NumericDocValuesField(sortFieldName(f), v)); } else if (f == ChangeField.UPDATED) { long t = ((Timestamp) getOnlyElement(values.getValues())).getTime(); doc.add(new NumericDocValuesField(UPDATED_SORT_FIELD, t)); } } if (type == FieldType.INTEGER || type == FieldType.INTEGER_RANGE) { for (Object value : values.getValues()) { doc.add(new IntField(name, (Integer) value, store)); } } else if (type == FieldType.LONG) { for (Object value : values.getValues()) { doc.add(new LongField(name, (Long) value, store)); } } else if (type == FieldType.TIMESTAMP) { for (Object value : values.getValues()) { doc.add(new LongField(name, ((Timestamp) value).getTime(), store)); } } else if (type == FieldType.EXACT || type == FieldType.PREFIX) { for (Object value : values.getValues()) { doc.add(new StringField(name, (String) value, store)); } } else if (type == FieldType.FULL_TEXT) { for (Object value : values.getValues()) { doc.add(new TextField(name, (String) value, store)); } } else if (type == FieldType.STORED_ONLY) { for (Object value : values.getValues()) { doc.add(new StoredField(name, (byte[]) value)); } } else { throw FieldType.badFieldType(type); } } private static Field.Store store(FieldDef<?, ?> f) { return f.isStored() ? Field.Store.YES : Field.Store.NO; } }