org.apache.fluo.recipes.export.ExportQueue.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.fluo.recipes.export.ExportQueue.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.fluo.recipes.export;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

import com.google.common.base.Preconditions;
import com.google.common.hash.Hashing;
import org.apache.fluo.api.client.TransactionBase;
import org.apache.fluo.api.config.FluoConfiguration;
import org.apache.fluo.api.config.ObserverConfiguration;
import org.apache.fluo.api.config.SimpleConfiguration;
import org.apache.fluo.api.data.Bytes;
import org.apache.fluo.recipes.common.Pirtos;
import org.apache.fluo.recipes.common.RowRange;
import org.apache.fluo.recipes.common.TransientRegistry;
import org.apache.fluo.recipes.serialization.SimpleSerializer;

public class ExportQueue<K, V> {

    private static final String RANGE_BEGIN = "#";
    private static final String RANGE_END = ":~";

    private int numBuckets;
    private SimpleSerializer serializer;
    private String queueId;

    // usage hint : could be created once in an observers init method
    // usage hint : maybe have a queue for each type of data being exported???
    // maybe less queues are
    // more efficient though because more batching at export time??
    ExportQueue(Options opts, SimpleSerializer serializer) throws Exception {
        // TODO sanity check key type based on type params
        // TODO defer creating classes until needed.. so that its not done during Fluo init
        this.queueId = opts.queueId;
        this.numBuckets = opts.numBuckets;
        this.serializer = serializer;
    }

    public void add(TransactionBase tx, K key, V value) {
        addAll(tx, Collections.singleton(new Export<>(key, value)).iterator());
    }

    public void addAll(TransactionBase tx, Iterator<Export<K, V>> exports) {

        Set<Integer> bucketsNotified = new HashSet<>();
        while (exports.hasNext()) {
            Export<K, V> export = exports.next();

            byte[] k = serializer.serialize(export.getKey());
            byte[] v = serializer.serialize(export.getValue());

            int hash = Hashing.murmur3_32().hashBytes(k).asInt();
            int bucketId = Math.abs(hash % numBuckets);

            ExportBucket bucket = new ExportBucket(tx, queueId, bucketId, numBuckets);
            bucket.add(tx.getStartTimestamp(), k, v);

            if (!bucketsNotified.contains(bucketId)) {
                bucket.notifyExportObserver();
                bucketsNotified.add(bucketId);
            }
        }
    }

    public static <K2, V2> ExportQueue<K2, V2> getInstance(String exportQueueId, SimpleConfiguration appConfig) {
        Options opts = new Options(exportQueueId, appConfig);
        try {
            return new ExportQueue<>(opts, SimpleSerializer.getInstance(appConfig));
        } catch (Exception e) {
            // TODO
            throw new RuntimeException(e);
        }
    }

    /**
     * Call this method before initializing Fluo.
     *
     * @param fluoConfig The configuration that will be used to initialize fluo.
     */
    public static void configure(FluoConfiguration fluoConfig, Options opts) {
        SimpleConfiguration appConfig = fluoConfig.getAppConfiguration();
        opts.save(appConfig);

        fluoConfig.addObserver(new ObserverConfiguration(ExportObserver.class.getName())
                .setParameters(Collections.singletonMap("queueId", opts.queueId)));

        Bytes exportRangeStart = Bytes.of(opts.queueId + RANGE_BEGIN);
        Bytes exportRangeStop = Bytes.of(opts.queueId + RANGE_END);

        new TransientRegistry(fluoConfig.getAppConfiguration()).addTransientRange("exportQueue." + opts.queueId,
                new RowRange(exportRangeStart, exportRangeStop));
    }

    /**
     * Return suggested Fluo table optimizations for all previously configured export queues.
     *
     * @param appConfig Must pass in the application configuration obtained from
     *        {@code FluoClient.getAppConfiguration()} or
     *        {@code FluoConfiguration.getAppConfiguration()}
     */

    public static Pirtos getTableOptimizations(SimpleConfiguration appConfig) {
        HashSet<String> queueIds = new HashSet<>();
        appConfig.getKeys(Options.PREFIX.substring(0, Options.PREFIX.length() - 1))
                .forEachRemaining(k -> queueIds.add(k.substring(Options.PREFIX.length()).split("\\.", 2)[0]));

        Pirtos pirtos = new Pirtos();
        queueIds.forEach(qid -> pirtos.merge(getTableOptimizations(qid, appConfig)));

        return pirtos;
    }

    /**
     * Return suggested Fluo table optimizations for the specified export queue.
     *
     * @param appConfig Must pass in the application configuration obtained from
     *        {@code FluoClient.getAppConfiguration()} or
     *        {@code FluoConfiguration.getAppConfiguration()}
     */
    public static Pirtos getTableOptimizations(String queueId, SimpleConfiguration appConfig) {
        Options opts = new Options(queueId, appConfig);

        List<Bytes> splits = new ArrayList<>();

        Bytes exportRangeStart = Bytes.of(opts.queueId + RANGE_BEGIN);
        Bytes exportRangeStop = Bytes.of(opts.queueId + RANGE_END);

        splits.add(exportRangeStart);
        splits.add(exportRangeStop);

        List<Bytes> exportSplits = new ArrayList<>();
        for (int i = opts.getBucketsPerTablet(); i < opts.numBuckets; i += opts.getBucketsPerTablet()) {
            exportSplits.add(ExportBucket.generateBucketRow(opts.queueId, i, opts.numBuckets));
        }
        Collections.sort(exportSplits);
        splits.addAll(exportSplits);

        Pirtos pirtos = new Pirtos();
        pirtos.setSplits(splits);

        // the tablet with end row <queueId># does not contain any data for the export queue and
        // should not be grouped with the export queue
        pirtos.setTabletGroupingRegex(Pattern.quote(queueId + ":"));

        return pirtos;
    }

    public static class Options {

        private static final String PREFIX = "recipes.exportQueue.";
        static final long DEFAULT_BUFFER_SIZE = 1 << 20;
        static final int DEFAULT_BUCKETS_PER_TABLET = 10;

        int numBuckets;
        Integer bucketsPerTablet = null;
        Long bufferSize;

        String keyType;
        String valueType;
        String exporterType;
        String queueId;

        Options(String queueId, SimpleConfiguration appConfig) {
            this.queueId = queueId;

            this.numBuckets = appConfig.getInt(PREFIX + queueId + ".buckets");
            this.exporterType = appConfig.getString(PREFIX + queueId + ".exporter");
            this.keyType = appConfig.getString(PREFIX + queueId + ".key");
            this.valueType = appConfig.getString(PREFIX + queueId + ".val");
            this.bufferSize = appConfig.getLong(PREFIX + queueId + ".bufferSize", DEFAULT_BUFFER_SIZE);
            this.bucketsPerTablet = appConfig.getInt(PREFIX + queueId + ".bucketsPerTablet",
                    DEFAULT_BUCKETS_PER_TABLET);
        }

        public Options(String queueId, String exporterType, String keyType, String valueType, int buckets) {
            Preconditions.checkArgument(buckets > 0);

            this.queueId = queueId;
            this.numBuckets = buckets;
            this.exporterType = exporterType;
            this.keyType = keyType;
            this.valueType = valueType;
        }

        public <K, V> Options(String queueId, Class<? extends Exporter<K, V>> exporter, Class<K> keyType,
                Class<V> valueType, int buckets) {
            this(queueId, exporter.getName(), keyType.getName(), valueType.getName(), buckets);
        }

        /**
         * Sets a limit on the amount of serialized updates to read into memory. Additional memory will
         * be used to actually deserialize and process the updates. This limit does not account for
         * object overhead in java, which can be significant.
         *
         * <p>
         * The way memory read is calculated is by summing the length of serialized key and value byte
         * arrays. Once this sum exceeds the configured memory limit, no more export key values are
         * processed in the current transaction. When not everything is processed, the observer
         * processing exports will notify itself causing another transaction to continue processing
         * later.
         */
        public Options setBufferSize(long bufferSize) {
            Preconditions.checkArgument(bufferSize > 0, "Buffer size must be positive");
            this.bufferSize = bufferSize;
            return this;
        }

        long getBufferSize() {
            if (bufferSize == null) {
                return DEFAULT_BUFFER_SIZE;
            }

            return bufferSize;
        }

        /**
         * Sets the number of buckets per tablet to generate. This affects how many split points will be
         * generated when optimizing the Accumulo table.
         *
         */
        public Options setBucketsPerTablet(int bucketsPerTablet) {
            Preconditions.checkArgument(bucketsPerTablet > 0, "bucketsPerTablet is <= 0 : " + bucketsPerTablet);
            this.bucketsPerTablet = bucketsPerTablet;
            return this;
        }

        int getBucketsPerTablet() {
            if (bucketsPerTablet == null) {
                return DEFAULT_BUCKETS_PER_TABLET;
            }

            return bucketsPerTablet;
        }

        void save(SimpleConfiguration appConfig) {
            appConfig.setProperty(PREFIX + queueId + ".buckets", numBuckets + "");
            appConfig.setProperty(PREFIX + queueId + ".exporter", exporterType + "");
            appConfig.setProperty(PREFIX + queueId + ".key", keyType);
            appConfig.setProperty(PREFIX + queueId + ".val", valueType);

            if (bufferSize != null) {
                appConfig.setProperty(PREFIX + queueId + ".bufferSize", bufferSize);
            }
            if (bucketsPerTablet != null) {
                appConfig.setProperty(PREFIX + queueId + ".bucketsPerTablet", bucketsPerTablet);
            }
        }
    }
}