Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.phoenix.iterate; import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_NUM_PARALLEL_SCANS; import java.sql.SQLException; import java.util.Collections; import java.util.List; import java.util.Queue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Pair; import org.apache.phoenix.compile.QueryPlan; import org.apache.phoenix.job.JobManager.JobCallable; import org.apache.phoenix.monitoring.MetricType; import org.apache.phoenix.monitoring.CombinableMetric; import org.apache.phoenix.monitoring.ReadMetricQueue; import org.apache.phoenix.monitoring.TaskExecutionMetricsHolder; import org.apache.phoenix.trace.util.Tracing; import org.apache.phoenix.util.LogUtil; import org.apache.phoenix.util.ScanUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; /** * * Class that parallelizes the scan over a table using the ExecutorService provided. Each region of the table will be scanned in parallel with * the results accessible through {@link #getIterators()} * * * @since 0.1 */ public class ParallelIterators extends BaseResultIterators { private static final Logger logger = LoggerFactory.getLogger(ParallelIterators.class); private static final String NAME = "PARALLEL"; private final ParallelIteratorFactory iteratorFactory; public ParallelIterators(QueryPlan plan, Integer perScanLimit, ParallelIteratorFactory iteratorFactory, ParallelScanGrouper scanGrouper) throws SQLException { super(plan, perScanLimit, scanGrouper); this.iteratorFactory = iteratorFactory; } public ParallelIterators(QueryPlan plan, Integer perScanLimit, ParallelIteratorFactory iteratorFactory) throws SQLException { this(plan, perScanLimit, iteratorFactory, DefaultParallelScanGrouper.getInstance()); } @Override protected void submitWork(List<List<Scan>> nestedScans, List<List<Pair<Scan, Future<PeekingResultIterator>>>> nestedFutures, final Queue<PeekingResultIterator> allIterators, int estFlattenedSize) { // Pre-populate nestedFutures lists so that we can shuffle the scans // and add the future to the right nested list. By shuffling the scans // we get better utilization of the cluster since our thread executor // will spray the scans across machines as opposed to targeting a // single one since the scans are in row key order. ExecutorService executor = context.getConnection().getQueryServices().getExecutor(); List<ScanLocator> scanLocations = Lists.newArrayListWithExpectedSize(estFlattenedSize); for (int i = 0; i < nestedScans.size(); i++) { List<Scan> scans = nestedScans.get(i); List<Pair<Scan, Future<PeekingResultIterator>>> futures = Lists .newArrayListWithExpectedSize(scans.size()); nestedFutures.add(futures); for (int j = 0; j < scans.size(); j++) { Scan scan = nestedScans.get(i).get(j); scanLocations.add(new ScanLocator(scan, i, j)); futures.add(null); // placeholder } } // Shuffle so that we start execution across many machines // before we fill up the thread pool Collections.shuffle(scanLocations); ReadMetricQueue readMetrics = context.getReadMetricsQueue(); final String physicalTableName = tableRef.getTable().getPhysicalName().getString(); int numScans = scanLocations.size(); context.getOverallQueryMetrics().updateNumParallelScans(numScans); GLOBAL_NUM_PARALLEL_SCANS.update(numScans); for (ScanLocator scanLocation : scanLocations) { final Scan scan = scanLocation.getScan(); final CombinableMetric scanMetrics = readMetrics.allotMetric(MetricType.SCAN_BYTES, physicalTableName); final TaskExecutionMetricsHolder taskMetrics = new TaskExecutionMetricsHolder(readMetrics, physicalTableName); Future<PeekingResultIterator> future = executor .submit(Tracing.wrap(new JobCallable<PeekingResultIterator>() { @Override public PeekingResultIterator call() throws Exception { long startTime = System.currentTimeMillis(); ResultIterator scanner = new TableResultIterator(context, tableRef, scan, scanMetrics); if (logger.isDebugEnabled()) { logger.debug(LogUtil.addCustomAnnotations("Id: " + scanId + ", Time: " + (System.currentTimeMillis() - startTime) + "ms, Scan: " + scan, ScanUtil.getCustomAnnotations(scan))); } PeekingResultIterator iterator = iteratorFactory.newIterator(context, scanner, scan, physicalTableName); // Fill the scanner's cache. This helps reduce latency since we are parallelizing the I/O needed. iterator.peek(); allIterators.add(iterator); return iterator; } /** * Defines the grouping for round robin behavior. All threads spawned to process * this scan will be grouped together and time sliced with other simultaneously * executing parallel scans. */ @Override public Object getJobId() { return ParallelIterators.this; } @Override public TaskExecutionMetricsHolder getTaskExecutionMetric() { return taskMetrics; } }, "Parallel scanner for table: " + tableRef.getTable().getName().getString())); // Add our future in the right place so that we can concatenate the // results of the inner futures versus merge sorting across all of them. nestedFutures.get(scanLocation.getOuterListIndex()).set(scanLocation.getInnerListIndex(), new Pair<Scan, Future<PeekingResultIterator>>(scan, future)); } } @Override protected String getName() { return NAME; } }