org.apache.accumulo.core.summary.Gatherer.java Source code

Introduction

Here is the source code for org.apache.accumulo.core.summary.Gatherer.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.accumulo.core.summary;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
import org.apache.accumulo.core.clientImpl.ClientContext;
import org.apache.accumulo.core.clientImpl.ServerClient;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.dataImpl.thrift.TRowRange;
import org.apache.accumulo.core.dataImpl.thrift.TSummaries;
import org.apache.accumulo.core.dataImpl.thrift.TSummaryRequest;
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
import org.apache.accumulo.core.rpc.ThriftUtil;
import org.apache.accumulo.core.spi.cache.BlockCache;
import org.apache.accumulo.core.spi.crypto.CryptoService;
import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
import org.apache.accumulo.core.tabletserver.thrift.TabletClientService.Client;
import org.apache.accumulo.core.trace.TraceUtil;
import org.apache.accumulo.core.trace.thrift.TInfo;
import org.apache.accumulo.core.util.ByteBufferUtil;
import org.apache.accumulo.core.util.CancelFlagFuture;
import org.apache.accumulo.core.util.CompletableFutureUtil;
import org.apache.accumulo.core.util.HostAndPort;
import org.apache.accumulo.core.util.TextUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.thrift.TApplicationException;
import org.apache.thrift.TException;
import org.apache.thrift.transport.TTransportException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;

/**
 * This class implements using multiple tservers to gather summaries.
 *
 * Below is a rough outline of the RPC process.
 *
 * <ol>
 * <li>Clients pick a random tserver and make an RPC to remotely execute
 * {@link #gather(ExecutorService)}.
 * <li>{@link #gather(ExecutorService)} will call make RPC calls to multiple tservers to remotely
 * execute {@link #processPartition(ExecutorService, int, int)}
 * <li>{@link #processPartition(ExecutorService, int, int)} will make RPC calls to multiple tserver
 * to remotely execute
 * <li>{@link #processFiles(FileSystemResolver, Map, BlockCache, BlockCache, Cache, ExecutorService)}
 * </ol>
 */
public class Gatherer {

    private static final Logger log = LoggerFactory.getLogger(Gatherer.class);

    private ClientContext ctx;
    private TableId tableId;
    private SummarizerFactory factory;
    private Text startRow = null;
    private Text endRow = null;
    private Range clipRange;
    private Predicate<SummarizerConfiguration> summarySelector;
    private CryptoService cryptoService;

    private TSummaryRequest request;

    private String summarizerPattern;

    private Set<SummarizerConfiguration> summaries;

    public Gatherer(ClientContext context, TSummaryRequest request, AccumuloConfiguration tableConfig,
            CryptoService cryptoService) {
        this.ctx = context;
        this.tableId = TableId.of(request.tableId);
        this.startRow = ByteBufferUtil.toText(request.bounds.startRow);
        this.endRow = ByteBufferUtil.toText(request.bounds.endRow);
        this.clipRange = new Range(startRow, false, endRow, true);
        this.summaries = request.getSummarizers().stream().map(SummarizerConfigurationUtil::fromThrift)
                .collect(Collectors.toSet());
        this.request = request;
        this.cryptoService = cryptoService;

        this.summarizerPattern = request.getSummarizerPattern();

        if (summarizerPattern != null) {
            Pattern pattern = Pattern.compile(summarizerPattern);
            // The way conf is converted to string below is documented in the API, so consider this when
            // making changes!
            summarySelector = conf -> pattern.matcher(conf.getClassName() + " " + new TreeMap<>(conf.getOptions()))
                    .matches();
            if (!summaries.isEmpty()) {
                summarySelector = summarySelector.or(conf -> summaries.contains(conf));
            }
        } else if (!summaries.isEmpty()) {
            summarySelector = conf -> summaries.contains(conf);
        } else {
            summarySelector = conf -> true;
        }

        this.factory = new SummarizerFactory(tableConfig);
    }

    private TSummaryRequest getRequest() {
        return request;
    }

    /**
     * @param fileSelector
     *          only returns files that match this predicate
     * @return A map of the form : {@code map<tserver location, map<path, list<range>>} . The ranges
     *         associated with a file represent the tablets that use the file.
     */
    private Map<String, Map<String, List<TRowRange>>> getFilesGroupedByLocation(Predicate<String> fileSelector) {

        Iterable<TabletMetadata> tmi = TabletsMetadata.builder().forTable(tableId).overlapping(startRow, endRow)
                .fetchFiles().fetchLocation().fetchLast().fetchPrev().build(ctx);

        // get a subset of files
        Map<String, List<TabletMetadata>> files = new HashMap<>();

        for (TabletMetadata tm : tmi) {
            for (String file : tm.getFiles()) {
                if (fileSelector.test(file)) {
                    // TODO push this filtering to server side and possibly use batch scanner
                    files.computeIfAbsent(file, s -> new ArrayList<>()).add(tm);
                }
            }
        }

        // group by location, then file

        Map<String, Map<String, List<TRowRange>>> locations = new HashMap<>();

        List<String> tservers = null;

        for (Entry<String, List<TabletMetadata>> entry : files.entrySet()) {

            String location = entry.getValue().stream().filter(tm -> tm.getLocation() != null) // filter
                    // tablets
                    // w/o a
                    // location
                    .map(tm -> tm.getLocation().getHostAndPort().toString()) // convert to host:port strings
                    .min(String::compareTo) // find minimum host:port
                    .orElse(entry.getValue().stream().filter(tm -> tm.getLast() != null) // if no locations,
                            // then look at last
                            // locations
                            .map(tm -> tm.getLast().getHostAndPort().toString()) // convert to host:port strings
                            .min(String::compareTo).orElse(null)); // find minimum last location or return null

            if (location == null) {
                if (tservers == null) {
                    tservers = ctx.instanceOperations().getTabletServers();
                    Collections.sort(tservers);
                }

                // When no location, the approach below will consistently choose the same tserver for the
                // same file (as long as the set of tservers is stable).
                int idx = Math.abs(Hashing.murmur3_32().hashString(entry.getKey(), UTF_8).asInt())
                        % tservers.size();
                location = tservers.get(idx);
            }

            // merge contiguous ranges
            List<Range> merged = Range
                    .mergeOverlapping(Lists.transform(entry.getValue(), tm -> tm.getExtent().toDataRange()));
            List<TRowRange> ranges = merged.stream().map(r -> toClippedExtent(r).toThrift())
                    .collect(Collectors.toList()); // clip ranges to queried range

            locations.computeIfAbsent(location, s -> new HashMap<>()).put(entry.getKey(), ranges);
        }

        return locations;
    }

    private <K, V> Iterable<Map<K, V>> partition(Map<K, V> map, int max) {

        if (map.size() < max) {
            return Collections.singletonList(map);
        }

        return () -> {
            Iterator<Entry<K, V>> esi = map.entrySet().iterator();

            return new Iterator<Map<K, V>>() {
                @Override
                public boolean hasNext() {
                    return esi.hasNext();
                }

                @Override
                public Map<K, V> next() {
                    Map<K, V> workingMap = new HashMap<>(max);
                    while (esi.hasNext() && workingMap.size() < max) {
                        Entry<K, V> entry = esi.next();
                        workingMap.put(entry.getKey(), entry.getValue());
                    }
                    return workingMap;
                }
            };
        };
    }

    private static class ProcessedFiles {
        final SummaryCollection summaries;
        final Set<String> failedFiles;

        public ProcessedFiles() {
            this.summaries = new SummaryCollection();
            this.failedFiles = new HashSet<>();
        }

        public ProcessedFiles(SummaryCollection summaries, SummarizerFactory factory) {
            this();
            this.summaries.merge(summaries, factory);
        }

        static ProcessedFiles merge(ProcessedFiles pf1, ProcessedFiles pf2, SummarizerFactory factory) {
            ProcessedFiles ret = new ProcessedFiles();
            ret.failedFiles.addAll(pf1.failedFiles);
            ret.failedFiles.addAll(pf2.failedFiles);
            ret.summaries.merge(pf1.summaries, factory);
            ret.summaries.merge(pf2.summaries, factory);
            return ret;
        }
    }

    private class FilesProcessor implements Supplier<ProcessedFiles> {

        HostAndPort location;
        Map<String, List<TRowRange>> allFiles;
        private TInfo tinfo;
        private AtomicBoolean cancelFlag;

        public FilesProcessor(TInfo tinfo, HostAndPort location, Map<String, List<TRowRange>> allFiles,
                AtomicBoolean cancelFlag) {
            this.location = location;
            this.allFiles = allFiles;
            this.tinfo = tinfo;
            this.cancelFlag = cancelFlag;
        }

        @Override
        public ProcessedFiles get() {
            ProcessedFiles pfiles = new ProcessedFiles();

            Client client = null;
            try {
                client = ThriftUtil.getTServerClient(location, ctx);
                // partition files into smaller chunks so that not too many are sent to a tserver at once
                for (Map<String, List<TRowRange>> files : partition(allFiles, 500)) {
                    if (pfiles.failedFiles.size() > 0) {
                        // there was a previous failure on this tserver, so just fail the rest of the files
                        pfiles.failedFiles.addAll(files.keySet());
                        continue;
                    }

                    try {
                        TSummaries tSums = client.startGetSummariesFromFiles(tinfo, ctx.rpcCreds(), getRequest(),
                                files);
                        while (!tSums.finished && !cancelFlag.get()) {
                            tSums = client.contiuneGetSummaries(tinfo, tSums.sessionId);
                        }

                        pfiles.summaries.merge(new SummaryCollection(tSums), factory);
                    } catch (TApplicationException tae) {
                        throw new RuntimeException(tae);
                    } catch (TTransportException e) {
                        pfiles.failedFiles.addAll(files.keySet());
                        continue;
                    } catch (TException e) {
                        throw new RuntimeException(e);
                    }

                }
            } catch (TTransportException e1) {
                pfiles.failedFiles.addAll(allFiles.keySet());
            } finally {
                ThriftUtil.returnClient(client);
            }

            if (cancelFlag.get()) {
                throw new RuntimeException("Operation canceled");
            }

            return pfiles;
        }
    }

    private class PartitionFuture implements Future<SummaryCollection> {

        private CompletableFuture<ProcessedFiles> future;
        private int modulus;
        private int remainder;
        private ExecutorService execSrv;
        private TInfo tinfo;
        private AtomicBoolean cancelFlag = new AtomicBoolean(false);

        PartitionFuture(TInfo tinfo, ExecutorService execSrv, int modulus, int remainder) {
            this.tinfo = tinfo;
            this.execSrv = execSrv;
            this.modulus = modulus;
            this.remainder = remainder;
        }

        private synchronized void initiateProcessing(ProcessedFiles previousWork) {
            try {
                Predicate<String> fileSelector = file -> Math
                        .abs(Hashing.murmur3_32().hashString(file, UTF_8).asInt()) % modulus == remainder;
                if (previousWork != null) {
                    fileSelector = fileSelector.and(previousWork.failedFiles::contains);
                }
                Map<String, Map<String, List<TRowRange>>> filesGBL;
                filesGBL = getFilesGroupedByLocation(fileSelector);

                List<CompletableFuture<ProcessedFiles>> futures = new ArrayList<>();
                if (previousWork != null) {
                    futures.add(
                            CompletableFuture.completedFuture(new ProcessedFiles(previousWork.summaries, factory)));
                }

                for (Entry<String, Map<String, List<TRowRange>>> entry : filesGBL.entrySet()) {
                    HostAndPort location = HostAndPort.fromString(entry.getKey());
                    Map<String, List<TRowRange>> allFiles = entry.getValue();

                    futures.add(CompletableFuture
                            .supplyAsync(new FilesProcessor(tinfo, location, allFiles, cancelFlag), execSrv));
                }

                future = CompletableFutureUtil.merge(futures, (pf1, pf2) -> ProcessedFiles.merge(pf1, pf2, factory),
                        ProcessedFiles::new);

                // when all processing is done, check for failed files... and if found starting processing
                // again
                future.thenRun(this::updateFuture);
            } catch (Exception e) {
                future = CompletableFuture.completedFuture(new ProcessedFiles());
                // force future to have this exception
                future.obtrudeException(e);
            }
        }

        private ProcessedFiles _get() {
            try {
                return future.get();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(e);
            } catch (ExecutionException e) {
                throw new RuntimeException(e);
            }
        }

        private synchronized CompletableFuture<ProcessedFiles> updateFuture() {
            if (future.isDone()) {
                if (!future.isCancelled() && !future.isCompletedExceptionally()) {
                    ProcessedFiles pf = _get();
                    if (pf.failedFiles.size() > 0) {
                        initiateProcessing(pf);
                    }
                }
            }

            return future;
        }

        synchronized void initiateProcessing() {
            Preconditions.checkState(future == null);
            initiateProcessing(null);
        }

        @Override
        public synchronized boolean cancel(boolean mayInterruptIfRunning) {
            boolean canceled = future.cancel(mayInterruptIfRunning);
            if (canceled) {
                cancelFlag.set(true);
            }
            return canceled;
        }

        @Override
        public synchronized boolean isCancelled() {
            return future.isCancelled();
        }

        @Override
        public synchronized boolean isDone() {
            updateFuture();
            if (future.isDone()) {
                if (future.isCancelled() || future.isCompletedExceptionally()) {
                    return true;
                }

                ProcessedFiles pf = _get();
                if (pf.failedFiles.size() == 0) {
                    return true;
                } else {
                    updateFuture();
                }
            }

            return false;
        }

        @Override
        public SummaryCollection get() throws InterruptedException, ExecutionException {
            CompletableFuture<ProcessedFiles> futureRef = updateFuture();
            ProcessedFiles processedFiles = futureRef.get();
            while (processedFiles.failedFiles.size() > 0) {
                futureRef = updateFuture();
                processedFiles = futureRef.get();
            }
            return processedFiles.summaries;
        }

        @Override
        public SummaryCollection get(long timeout, TimeUnit unit)
                throws InterruptedException, ExecutionException, TimeoutException {
            long nanosLeft = unit.toNanos(timeout);
            long t1, t2;
            CompletableFuture<ProcessedFiles> futureRef = updateFuture();
            t1 = System.nanoTime();
            ProcessedFiles processedFiles = futureRef.get(Long.max(1, nanosLeft), TimeUnit.NANOSECONDS);
            t2 = System.nanoTime();
            nanosLeft -= (t2 - t1);
            while (processedFiles.failedFiles.size() > 0) {
                futureRef = updateFuture();
                t1 = System.nanoTime();
                processedFiles = futureRef.get(Long.max(1, nanosLeft), TimeUnit.NANOSECONDS);
                t2 = System.nanoTime();
                nanosLeft -= (t2 - t1);
            }
            return processedFiles.summaries;
        }

    }

    /**
     * This methods reads a subset of file paths into memory and groups them by location. Then it
     * request summaries for files from each location/tablet server.
     */
    public Future<SummaryCollection> processPartition(ExecutorService execSrv, int modulus, int remainder) {
        PartitionFuture future = new PartitionFuture(TraceUtil.traceInfo(), execSrv, modulus, remainder);
        future.initiateProcessing();
        return future;
    }

    public interface FileSystemResolver {
        FileSystem get(Path file);
    }

    /**
     * This method will read summaries from a set of files.
     */
    public Future<SummaryCollection> processFiles(FileSystemResolver volMgr, Map<String, List<TRowRange>> files,
            BlockCache summaryCache, BlockCache indexCache, Cache<String, Long> fileLenCache, ExecutorService srp) {
        List<CompletableFuture<SummaryCollection>> futures = new ArrayList<>();
        for (Entry<String, List<TRowRange>> entry : files.entrySet()) {
            futures.add(CompletableFuture.supplyAsync(() -> {
                List<RowRange> rrl = Lists.transform(entry.getValue(), RowRange::new);
                return getSummaries(volMgr, entry.getKey(), rrl, summaryCache, indexCache, fileLenCache);
            }, srp));
        }

        return CompletableFutureUtil.merge(futures, (sc1, sc2) -> SummaryCollection.merge(sc1, sc2, factory),
                SummaryCollection::new);
    }

    private int countFiles() {
        // TODO use a batch scanner + iterator to parallelize counting files
        return TabletsMetadata.builder().forTable(tableId).overlapping(startRow, endRow).fetchFiles().fetchPrev()
                .build(ctx).stream().mapToInt(tm -> tm.getFiles().size()).sum();
    }

    private class GatherRequest implements Supplier<SummaryCollection> {

        private int remainder;
        private int modulus;
        private TInfo tinfo;
        private AtomicBoolean cancelFlag;

        GatherRequest(TInfo tinfo, int remainder, int modulus, AtomicBoolean cancelFlag) {
            this.remainder = remainder;
            this.modulus = modulus;
            this.tinfo = tinfo;
            this.cancelFlag = cancelFlag;
        }

        @Override
        public SummaryCollection get() {
            TSummaryRequest req = getRequest();

            TSummaries tSums;
            try {
                tSums = ServerClient.execute(ctx, new TabletClientService.Client.Factory(), client -> {
                    TSummaries tsr = client.startGetSummariesForPartition(tinfo, ctx.rpcCreds(), req, modulus,
                            remainder);
                    while (!tsr.finished && !cancelFlag.get()) {
                        tsr = client.contiuneGetSummaries(tinfo, tsr.sessionId);
                    }
                    return tsr;
                });
            } catch (AccumuloException | AccumuloSecurityException e) {
                throw new RuntimeException(e);
            }

            if (cancelFlag.get()) {
                throw new RuntimeException("Operation canceled");
            }

            return new SummaryCollection(tSums);
        }
    }

    public Future<SummaryCollection> gather(ExecutorService es) {
        int numFiles = countFiles();

        log.debug("Gathering summaries from {} files", numFiles);

        if (numFiles == 0) {
            return CompletableFuture.completedFuture(new SummaryCollection());
        }

        // have each tablet server process ~100K files
        int numRequest = Math.max(numFiles / 100_000, 1);

        List<CompletableFuture<SummaryCollection>> futures = new ArrayList<>();

        AtomicBoolean cancelFlag = new AtomicBoolean(false);

        TInfo tinfo = TraceUtil.traceInfo();
        for (int i = 0; i < numRequest; i++) {
            futures.add(CompletableFuture.supplyAsync(new GatherRequest(tinfo, i, numRequest, cancelFlag), es));
        }

        Future<SummaryCollection> future = CompletableFutureUtil.merge(futures,
                (sc1, sc2) -> SummaryCollection.merge(sc1, sc2, factory), SummaryCollection::new);
        return new CancelFlagFuture<>(future, cancelFlag);
    }

    private static Text removeTrailingZeroFromRow(Key k) {
        if (k != null) {
            Text t = new Text();
            ByteSequence row = k.getRowData();
            Preconditions.checkArgument(row.length() >= 1 && row.byteAt(row.length() - 1) == 0);
            t.set(row.getBackingArray(), row.offset(), row.length() - 1);
            return t;
        } else {
            return null;
        }
    }

    private RowRange toClippedExtent(Range r) {
        r = clipRange.clip(r);

        Text startRow = removeTrailingZeroFromRow(r.getStartKey());
        Text endRow = removeTrailingZeroFromRow(r.getEndKey());

        return new RowRange(startRow, endRow);
    }

    public static class RowRange {
        private Text startRow;
        private Text endRow;

        public RowRange(KeyExtent ke) {
            this.startRow = ke.getPrevEndRow();
            this.endRow = ke.getEndRow();
        }

        public RowRange(TRowRange trr) {
            this.startRow = ByteBufferUtil.toText(trr.startRow);
            this.endRow = ByteBufferUtil.toText(trr.endRow);
        }

        public RowRange(Text startRow, Text endRow) {
            this.startRow = startRow;
            this.endRow = endRow;
        }

        public Range toRange() {
            return new Range(startRow, false, endRow, true);
        }

        public TRowRange toThrift() {
            return new TRowRange(TextUtil.getByteBuffer(startRow), TextUtil.getByteBuffer(endRow));
        }

        public Text getStartRow() {
            return startRow;
        }

        public Text getEndRow() {
            return endRow;
        }

        @Override
        public String toString() {
            return startRow + " " + endRow;
        }
    }

    private SummaryCollection getSummaries(FileSystemResolver volMgr, String file, List<RowRange> ranges,
            BlockCache summaryCache, BlockCache indexCache, Cache<String, Long> fileLenCache) {
        Path path = new Path(file);
        Configuration conf = ctx.getHadoopConf();
        return SummaryReader.load(volMgr.get(path), conf, factory, path, summarySelector, summaryCache, indexCache,
                fileLenCache, cryptoService).getSummaries(ranges);
    }
}