org.apache.crunch.io.hbase.HFileUtils.java Source code

Introduction

Here is the source code for org.apache.crunch.io.hbase.HFileUtils.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.crunch.io.hbase;

import static org.apache.crunch.types.writable.Writables.bytes;
import static org.apache.crunch.types.writable.Writables.nulls;
import static org.apache.crunch.types.writable.Writables.tableOf;

import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Set;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.primitives.Longs;
import org.apache.crunch.CrunchRuntimeException;
import org.apache.crunch.DoFn;
import org.apache.crunch.Emitter;
import org.apache.crunch.FilterFn;
import org.apache.crunch.GroupingOptions;
import org.apache.crunch.MapFn;
import org.apache.crunch.PCollection;
import org.apache.crunch.PTable;
import org.apache.crunch.Pair;
import org.apache.crunch.Pipeline;
import org.apache.crunch.impl.dist.DistributedPipeline;
import org.apache.crunch.lib.sort.TotalOrderPartitioner;
import org.apache.crunch.types.writable.Writables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.SequenceFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class HFileUtils {

    private static final Logger LOG = LoggerFactory.getLogger(HFileUtils.class);

    /** Compares {@code KeyValue} by its family, qualifier, timestamp (reversely), type (reversely) and memstoreTS. */
    private static final Comparator<KeyValue> KEY_VALUE_COMPARATOR = new Comparator<KeyValue>() {
        @Override
        public int compare(KeyValue l, KeyValue r) {
            int cmp;
            if ((cmp = compareFamily(l, r)) != 0) {
                return cmp;
            }
            if ((cmp = compareQualifier(l, r)) != 0) {
                return cmp;
            }
            if ((cmp = compareTimestamp(l, r)) != 0) {
                return cmp;
            }
            if ((cmp = compareType(l, r)) != 0) {
                return cmp;
            }
            return 0;
        }

        private int compareFamily(KeyValue l, KeyValue r) {
            return Bytes.compareTo(l.getBuffer(), l.getFamilyOffset(), l.getFamilyLength(), r.getBuffer(),
                    r.getFamilyOffset(), r.getFamilyLength());
        }

        private int compareQualifier(KeyValue l, KeyValue r) {
            return Bytes.compareTo(l.getBuffer(), l.getQualifierOffset(), l.getQualifierLength(), r.getBuffer(),
                    r.getQualifierOffset(), r.getQualifierLength());
        }

        private int compareTimestamp(KeyValue l, KeyValue r) {
            // These arguments are intentionally reversed, with r then l, to sort
            // the timestamps in descending order as is expected by HBase
            return Longs.compare(r.getTimestamp(), l.getTimestamp());
        }

        private int compareType(KeyValue l, KeyValue r) {
            return (int) r.getType() - (int) l.getType();
        }

    };

    private static class FilterByFamilyFn<C extends Cell> extends FilterFn<C> {

        private final byte[] family;

        private FilterByFamilyFn(byte[] family) {
            this.family = family;
        }

        @Override
        public boolean accept(C input) {
            return Bytes.equals(input.getFamilyArray(), input.getFamilyOffset(), input.getFamilyLength(), family, 0,
                    family.length);
        }

        @Override
        public boolean disableDeepCopy() {
            return true;
        }
    }

    private static class StartRowFilterFn<C extends Cell> extends FilterFn<C> {

        private final byte[] startRow;

        private StartRowFilterFn(byte[] startRow) {
            this.startRow = startRow;
        }

        @Override
        public boolean accept(C input) {
            return Bytes.compareTo(input.getRowArray(), input.getRowOffset(), input.getRowLength(), startRow, 0,
                    startRow.length) >= 0;
        }
    }

    private static class StopRowFilterFn<C extends Cell> extends FilterFn<C> {

        private final byte[] stopRow;

        private StopRowFilterFn(byte[] stopRow) {
            this.stopRow = stopRow;
        }

        @Override
        public boolean accept(C input) {
            return Bytes.compareTo(input.getRowArray(), input.getRowOffset(), input.getRowLength(), stopRow, 0,
                    stopRow.length) < 0;
        }
    }

    private static class FamilyMapFilterFn<C extends Cell> extends FilterFn<C> {

        private static class Column implements Serializable {

            private final byte[] family;
            private final byte[] qualifier;

            private Column(byte[] family, byte[] qualifier) {
                this.family = family;
                this.qualifier = qualifier;
            }

            private byte[] getFamily() {
                return family;
            }

            private byte[] getQualifier() {
                return qualifier;
            }
        }

        private final List<byte[]> families = Lists.newArrayList();
        private final List<Column> qualifiers = Lists.newArrayList();

        private transient Set<ByteBuffer> familySet;
        private transient Set<Pair<ByteBuffer, ByteBuffer>> qualifierSet;

        private FamilyMapFilterFn(Map<byte[], NavigableSet<byte[]>> familyMap) {
            // Holds good families and qualifiers in Lists, as ByteBuffer is not Serializable.
            for (Map.Entry<byte[], NavigableSet<byte[]>> e : familyMap.entrySet()) {
                byte[] f = e.getKey();
                if (e.getValue() == null) {
                    families.add(f);
                } else {
                    for (byte[] q : e.getValue()) {
                        qualifiers.add(new Column(f, q));
                    }
                }
            }
        }

        @Override
        public void initialize() {
            ImmutableSet.Builder<ByteBuffer> familiySetBuilder = ImmutableSet.builder();
            ImmutableSet.Builder<Pair<ByteBuffer, ByteBuffer>> qualifierSetBuilder = ImmutableSet.builder();
            for (byte[] f : families) {
                familiySetBuilder.add(ByteBuffer.wrap(f));
            }
            for (Column e : qualifiers) {
                byte[] f = e.getFamily();
                byte[] q = e.getQualifier();
                qualifierSetBuilder.add(Pair.of(ByteBuffer.wrap(f), ByteBuffer.wrap(q)));
            }
            this.familySet = familiySetBuilder.build();
            this.qualifierSet = qualifierSetBuilder.build();
        }

        @Override
        public boolean accept(C input) {
            ByteBuffer f = ByteBuffer.wrap(input.getFamilyArray(), input.getFamilyOffset(),
                    input.getFamilyLength());
            ByteBuffer q = ByteBuffer.wrap(input.getQualifierArray(), input.getQualifierOffset(),
                    input.getQualifierLength());
            return familySet.contains(f) || qualifierSet.contains(Pair.of(f, q));
        }
    }

    private static class TimeRangeFilterFn<C extends Cell> extends FilterFn<C> {

        private final long minTimestamp;
        private final long maxTimestamp;

        private TimeRangeFilterFn(TimeRange timeRange) {
            // Can't save TimeRange to member directly, as it is not Serializable.
            this.minTimestamp = timeRange.getMin();
            this.maxTimestamp = timeRange.getMax();
        }

        @Override
        public boolean accept(C input) {
            return (minTimestamp <= input.getTimestamp() && input.getTimestamp() < maxTimestamp);
        }
    }

    public static class KeyValueComparator implements RawComparator<BytesWritable> {

        @Override
        public int compare(byte[] left, int loffset, int llength, byte[] right, int roffset, int rlength) {
            // BytesWritable serialize length in first 4 bytes.
            // We simply ignore it here, because KeyValue has its own size serialized.
            if (llength < 4) {
                throw new AssertionError("Too small llength: " + llength);
            }
            if (rlength < 4) {
                throw new AssertionError("Too small rlength: " + rlength);
            }
            Cell leftKey = HBaseTypes.bytesToKeyValue(left, loffset + 4, llength - 4);
            Cell rightKey = HBaseTypes.bytesToKeyValue(right, roffset + 4, rlength - 4);

            byte[] lRow = leftKey.getRow();
            byte[] rRow = rightKey.getRow();
            int rowCmp = Bytes.compareTo(lRow, rRow);
            if (rowCmp != 0) {
                return rowCmp;
            } else {
                return KeyValue.COMPARATOR.compare(leftKey, rightKey);
            }
        }

        @Override
        public int compare(BytesWritable left, BytesWritable right) {
            return KeyValue.COMPARATOR.compare(HBaseTypes.bytesToKeyValue(left), HBaseTypes.bytesToKeyValue(right));
        }
    }

    private static class ExtractRowFn<C extends Cell> extends MapFn<C, ByteBuffer> {
        @Override
        public ByteBuffer map(Cell input) {
            // we have to make a copy of row, because the buffer may be changed after this call
            return ByteBuffer.wrap(CellUtil.cloneRow(input));
        }
    }

    public static PCollection<Result> scanHFiles(Pipeline pipeline, Path path) {
        return scanHFiles(pipeline, path, new Scan());
    }

    /**
     * Scans HFiles with filter conditions.
     *
     * @param pipeline the pipeline
     * @param path path to HFiles
     * @param scan filtering conditions
     * @return {@code Result}s
     * @see #combineIntoRow(org.apache.crunch.PCollection, org.apache.hadoop.hbase.client.Scan)
     */
    public static PCollection<Result> scanHFiles(Pipeline pipeline, Path path, Scan scan) {
        return scanHFiles(pipeline, ImmutableList.of(path), scan);
    }

    public static PCollection<Result> scanHFiles(Pipeline pipeline, List<Path> paths, Scan scan) {
        PCollection<KeyValue> in = pipeline.read(new HFileSource(paths, scan));
        return combineIntoRow(in, scan);
    }

    public static <C extends Cell> PCollection<Result> combineIntoRow(PCollection<C> cells) {
        return combineIntoRow(cells, new Scan());
    }

    /**
     * Converts a bunch of {@link KeyValue}s into {@link Result}.
     *
     * All {@code KeyValue}s belong to the same row are combined. Users may provide some filter
     * conditions (specified by {@code scan}). Deletes are dropped and only a specified number
     * of versions are kept.
     *
     * @param cells the input {@code KeyValue}s
     * @param scan filter conditions, currently we support start row, stop row and family map
     * @return {@code Result}s
     */
    public static <C extends Cell> PCollection<Result> combineIntoRow(PCollection<C> cells, Scan scan) {
        if (!Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)) {
            cells = cells.filter(new StartRowFilterFn<C>(scan.getStartRow()));
        }
        if (!Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
            cells = cells.filter(new StopRowFilterFn<C>(scan.getStopRow()));
        }
        if (scan.hasFamilies()) {
            cells = cells.filter(new FamilyMapFilterFn<C>(scan.getFamilyMap()));
        }
        TimeRange timeRange = scan.getTimeRange();
        if (timeRange != null && (timeRange.getMin() > 0 || timeRange.getMax() < Long.MAX_VALUE)) {
            cells = cells.filter(new TimeRangeFilterFn<C>(timeRange));
        }
        // TODO(chaoshi): support Scan#getFilter

        PTable<ByteBuffer, C> cellsByRow = cells.by(new ExtractRowFn<C>(), bytes());
        final int versions = scan.getMaxVersions();
        return cellsByRow.groupByKey().parallelDo("CombineKeyValueIntoRow",
                new DoFn<Pair<ByteBuffer, Iterable<C>>, Result>() {
                    @Override
                    public void process(Pair<ByteBuffer, Iterable<C>> input, Emitter<Result> emitter) {
                        List<KeyValue> cells = Lists.newArrayList();
                        for (Cell kv : input.second()) {
                            try {
                                cells.add(KeyValue.cloneAndAddTags(kv, ImmutableList.<Tag>of())); // assuming the input fits into memory
                            } catch (Exception e) {
                                throw new RuntimeException(e);
                            }
                        }
                        Result result = doCombineIntoRow(cells, versions);
                        if (result == null) {
                            return;
                        }
                        emitter.emit(result);
                    }
                }, HBaseTypes.results());
    }

    public static <C extends Cell> void writeToHFilesForIncrementalLoad(PCollection<C> cells, HTable table,
            Path outputPath) throws IOException {
        writeToHFilesForIncrementalLoad(cells, table, outputPath, false);
    }

    /**
     * Writes out HFiles from the provided <code>cells</code> and <code>table</code>. <code>limitToAffectedRegions</code>
     * is used to indicate that the regions the <code>cells</code> will be loaded into should be identified prior to writing
     * HFiles. Identifying the regions ahead of time will reduce the number of reducers needed when writing. This is
     * beneficial if the data to be loaded only touches a small enough subset of the total regions in the table. If set to
     * false, the number of reducers will equal the number of regions in the table.
     *
     * @see <a href='https://issues.apache.org/jira/browse/CRUNCH-588'>CRUNCH-588</a>
     */
    public static <C extends Cell> void writeToHFilesForIncrementalLoad(PCollection<C> cells, HTable table,
            Path outputPath, boolean limitToAffectedRegions) throws IOException {
        HColumnDescriptor[] families = table.getTableDescriptor().getColumnFamilies();
        if (families.length == 0) {
            LOG.warn("{} has no column families", table);
            return;
        }
        PCollection<C> partitioned = sortAndPartition(cells, table, limitToAffectedRegions);
        for (HColumnDescriptor f : families) {
            byte[] family = f.getName();
            partitioned.filter(new FilterByFamilyFn<C>(family))
                    .write(new HFileTarget(new Path(outputPath, Bytes.toString(family)), f));
        }
    }

    public static void writePutsToHFilesForIncrementalLoad(PCollection<Put> puts, HTable table, Path outputPath)
            throws IOException {
        writePutsToHFilesForIncrementalLoad(puts, table, outputPath, false);
    }

    /**
     * Writes out HFiles from the provided <code>puts</code> and <code>table</code>. <code>limitToAffectedRegions</code>
     * is used to indicate that the regions the <code>puts</code> will be loaded into should be identified prior to writing
     * HFiles. Identifying the regions ahead of time will reduce the number of reducers needed when writing. This is
     * beneficial if the data to be loaded only touches a small enough subset of the total regions in the table. If set to
     * false, the number of reducers will equal the number of regions in the table.
     *
     * @see <a href='https://issues.apache.org/jira/browse/CRUNCH-588'>CRUNCH-588</a>
     */
    public static void writePutsToHFilesForIncrementalLoad(PCollection<Put> puts, HTable table, Path outputPath,
            boolean limitToAffectedRegions) throws IOException {
        PCollection<Cell> cells = puts.parallelDo("ConvertPutToCells", new DoFn<Put, Cell>() {
            @Override
            public void process(Put input, Emitter<Cell> emitter) {
                for (Cell cell : Iterables.concat(input.getFamilyCellMap().values())) {
                    emitter.emit(cell);
                }
            }
        }, HBaseTypes.cells());
        writeToHFilesForIncrementalLoad(cells, table, outputPath, limitToAffectedRegions);
    }

    public static <C extends Cell> PCollection<C> sortAndPartition(PCollection<C> cells, HTable table)
            throws IOException {
        return sortAndPartition(cells, table, false);
    }

    /**
     * Sorts and partitions the provided <code>cells</code> for the given <code>table</code> to ensure all elements that belong
     * in the same region end up in the same reducer. The flag <code>limitToAffectedRegions</code>, when set to true, will identify
     * the regions the data in <code>cells</code> belongs to and will set the number of reducers equal to the number of identified
     * affected regions. If set to false, then all regions will be used, and the number of reducers will be set to the number
     * of regions in the table.
     */
    public static <C extends Cell> PCollection<C> sortAndPartition(PCollection<C> cells, HTable table,
            boolean limitToAffectedRegions) throws IOException {
        Configuration conf = cells.getPipeline().getConfiguration();
        PTable<C, Void> t = cells.parallelDo("Pre-partition", new MapFn<C, Pair<C, Void>>() {
            @Override
            public Pair<C, Void> map(C input) {
                return Pair.of(input, (Void) null);
            }
        }, tableOf(cells.getPType(), nulls()));

        List<KeyValue> splitPoints;
        if (limitToAffectedRegions) {
            splitPoints = getSplitPoints(table, t);
        } else {
            splitPoints = getSplitPoints(table);
        }
        Path partitionFile = new Path(((DistributedPipeline) cells.getPipeline()).createTempPath(), "partition");
        writePartitionInfo(conf, partitionFile, splitPoints);
        GroupingOptions options = GroupingOptions.builder().partitionerClass(TotalOrderPartitioner.class)
                .sortComparatorClass(KeyValueComparator.class)
                .conf(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString())
                .numReducers(splitPoints.size() + 1).build();
        return t.groupByKey(options).ungroup().keys();
    }

    private static List<KeyValue> getSplitPoints(HTable table) throws IOException {
        List<byte[]> startKeys = ImmutableList.copyOf(table.getStartKeys());
        if (startKeys.isEmpty()) {
            throw new AssertionError(table + " has no regions!");
        }
        List<KeyValue> splitPoints = Lists.newArrayList();
        for (byte[] startKey : startKeys.subList(1, startKeys.size())) {
            KeyValue kv = KeyValueUtil.createFirstOnRow(startKey);
            LOG.debug("split row: " + Bytes.toString(CellUtil.cloneRow(kv)));
            splitPoints.add(kv);
        }
        return splitPoints;
    }

    private static <C> List<KeyValue> getSplitPoints(HTable table, PTable<C, Void> affectedRows)
            throws IOException {
        List<byte[]> startKeys;
        try {
            startKeys = Lists.newArrayList(table.getStartKeys());
            if (startKeys.isEmpty()) {
                throw new AssertionError(table + " has no regions!");
            }
        } catch (IOException e) {
            throw new CrunchRuntimeException(e);
        }

        Collections.sort(startKeys, Bytes.BYTES_COMPARATOR);

        Iterable<ByteBuffer> bufferedStartKeys = affectedRows
                .parallelDo(new DetermineAffectedRegionsFn(startKeys), Writables.bytes()).materialize();

        // set to get rid of the potential duplicate start keys emitted
        ImmutableSet.Builder<KeyValue> startKeyBldr = ImmutableSet.builder();
        for (final ByteBuffer bufferedStartKey : bufferedStartKeys) {
            startKeyBldr.add(KeyValueUtil.createFirstOnRow(bufferedStartKey.array()));
        }

        return ImmutableList.copyOf(startKeyBldr.build());
    }

    /**
     * Spins through the {@link Cell}s and determines which regions the data
     * will be loaded into. Searching the regions is done via a binary search. The
     * region start key should be provided by the caller to cut down on calls to
     * HMaster to get those start keys.
     */
    public static class DetermineAffectedRegionsFn<C extends Cell> extends DoFn<Pair<C, Void>, ByteBuffer> {

        private final Set<Cell> startKeysToEmit = new HashSet<>();
        List<byte[]> startKeys;
        TotalOrderPartitioner.Node partitions;
        List<Cell> regionStartKeys = Lists.newArrayList();

        public DetermineAffectedRegionsFn(List<byte[]> startKeys) {
            this.startKeys = startKeys;
        }

        @Override
        public void initialize() {
            for (byte[] startKey : startKeys.subList(1, startKeys.size())) {
                Cell cell = KeyValueUtil.createFirstOnRow(startKey);
                regionStartKeys.add(cell);
            }

            partitions = new TotalOrderPartitioner.BinarySearchNode<>(
                    regionStartKeys.toArray(new Cell[regionStartKeys.size()]), new KeyValue.KVComparator());
        }

        @Override
        public void process(Pair<C, Void> input, Emitter<ByteBuffer> emitter) {
            int position = partitions.findPartition(new KeyValue(input.first().getFamilyArray()));
            // if the position is after the last key, use the last start key
            // as the split for this key, since it should fall into that region
            if (position >= regionStartKeys.size() && regionStartKeys.size() > 1) {
                position = regionStartKeys.size() - 1;
            }

            Cell foundCell = regionStartKeys.get(position);

            if (!startKeysToEmit.contains(foundCell)) {
                startKeysToEmit.add(foundCell);
                emitter.emit(ByteBuffer.wrap(CellUtil.cloneRow(foundCell)));
            }
        }
    }

    private static void writePartitionInfo(Configuration conf, Path path, List<KeyValue> splitPoints)
            throws IOException {
        LOG.info("Writing {} split points to {}", splitPoints.size(), path);
        SequenceFile.Writer writer = SequenceFile.createWriter(path.getFileSystem(conf), conf, path,
                NullWritable.class, BytesWritable.class);
        for (KeyValue key : splitPoints) {
            writer.append(NullWritable.get(), HBaseTypes.keyValueToBytes(key));
        }
        writer.close();
    }

    private static Result doCombineIntoRow(List<KeyValue> kvs, int versions) {
        // shortcut for the common case
        if (kvs.isEmpty()) {
            return null;
        }
        if (kvs.size() == 1 && kvs.get(0).getType() == KeyValue.Type.Put.getCode()) {
            return new Result(kvs);
        }

        kvs = maybeDeleteFamily(kvs);

        // In-place sort KeyValues by family, qualifier and then timestamp reversely (whenever ties, deletes appear first).
        Collections.sort(kvs, KEY_VALUE_COMPARATOR);

        List<KeyValue> results = Lists.newArrayListWithCapacity(kvs.size());
        for (int i = 0, j; i < kvs.size(); i = j) {
            j = i + 1;
            while (j < kvs.size() && hasSameFamilyAndQualifier(kvs.get(i), kvs.get(j))) {
                j++;
            }
            results.addAll(getLatestKeyValuesOfColumn(kvs.subList(i, j), versions));
        }
        if (results.isEmpty()) {
            return null;
        }
        return new Result(results);
    }

    /**
     * In-place removes any {@link KeyValue}s whose timestamp is less than or equal to the
     * delete family timestamp. Also removes the delete family {@code KeyValue}s.
     */
    private static List<KeyValue> maybeDeleteFamily(List<KeyValue> kvs) {
        long deleteFamilyCut = -1;
        for (KeyValue kv : kvs) {
            if (kv.getType() == KeyValue.Type.DeleteFamily.getCode()) {
                deleteFamilyCut = Math.max(deleteFamilyCut, kv.getTimestamp());
            }
        }
        if (deleteFamilyCut == 0) {
            return kvs;
        }
        List<KeyValue> results = Lists.newArrayList();
        for (KeyValue kv : kvs) {
            if (kv.getType() == KeyValue.Type.DeleteFamily.getCode()) {
                continue;
            }
            if (kv.getTimestamp() <= deleteFamilyCut) {
                continue;
            }
            results.add(kv);
        }
        return results;
    }

    private static boolean hasSameFamilyAndQualifier(KeyValue l, KeyValue r) {
        return Bytes.equals(l.getBuffer(), l.getFamilyOffset(), l.getFamilyLength(), r.getBuffer(),
                r.getFamilyOffset(), r.getFamilyLength())
                && Bytes.equals(l.getBuffer(), l.getQualifierOffset(), l.getQualifierLength(), r.getBuffer(),
                        r.getQualifierOffset(), r.getQualifierLength());
    }

    /**
     * Goes over the given {@link KeyValue}s and remove {@code Delete}s and {@code DeleteColumn}s.
     *
     * @param kvs {@code KeyValue}s that of same row and column and sorted by timestamps in
     *            descending order
     * @param versions the number of versions to keep
     * @return the resulting {@code KeyValue}s that contains only {@code Put}s
     */
    private static List<KeyValue> getLatestKeyValuesOfColumn(List<KeyValue> kvs, int versions) {
        if (kvs.isEmpty()) {
            return kvs;
        }
        if (kvs.get(0).getType() == KeyValue.Type.Put.getCode()) {
            return kvs; // shortcut for the common case
        }

        List<KeyValue> results = Lists.newArrayListWithCapacity(versions);
        long previousDeleteTimestamp = -1;
        for (KeyValue kv : kvs) {
            if (results.size() >= versions) {
                break;
            }
            if (kv.getType() == KeyValue.Type.DeleteColumn.getCode()) {
                break;
            } else if (kv.getType() == KeyValue.Type.Put.getCode()) {
                if (kv.getTimestamp() != previousDeleteTimestamp) {
                    results.add(kv);
                }
            } else if (kv.getType() == KeyValue.Type.Delete.getCode()) {
                previousDeleteTimestamp = kv.getTimestamp();
            } else {
                throw new AssertionError("Unexpected KeyValue type: " + kv.getType());
            }
        }
        return results;
    }
}