Java tutorial
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package com.cloudera.impala.catalog; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import com.cloudera.impala.common.Pair; import com.cloudera.impala.thrift.THdfsPartitionLocation; import com.cloudera.impala.util.ListMap; import com.google.common.base.Preconditions; /** * Utility class for storing HdfsPartition locations in a comrpessed format. Each * instance of this class is owned by a single HdfsTable instance. * * This class is not thread-safe by itself since it is only modified when the lock on an * HdfsTable object is held. * * TODO: Generalize this to compress other sets of Strings that are likely to share common * prefixes, like table locations. * */ class HdfsPartitionLocationCompressor { int numClusteringColumns_; // A bi-directional map between partition location prefixes and their compressed // representation, an int. final private ListMap<String> prefixMap_ = new ListMap<String>(); public HdfsPartitionLocationCompressor(int numClusteringColumns) { numClusteringColumns_ = numClusteringColumns; } // Construct an HdfsPartitionLocationCompressor with a pre-filled bidirectional map // (indexToPrefix_, prefixToIndex_). public HdfsPartitionLocationCompressor(int numClusteringColumns, ArrayList<String> prefixes) { numClusteringColumns_ = numClusteringColumns; prefixMap_.populate(prefixes); } public void setClusteringColumns(int numClusteringColumns) { numClusteringColumns_ = numClusteringColumns; } public List<String> getPrefixes() { return prefixMap_.getList(); } // One direction of the map: returns the prefix associated with an index, or "" is the // index is -1. Indexes less than -1 or greater than indexToPrefix_.size()-1 are invalid // and casue and IllegalArgumentException to be thrown. private String indexToPrefix(int i) { // Uncompressed location are represented by -1: if (i == -1) return ""; Preconditions.checkElementIndex(i, prefixMap_.size()); return prefixMap_.getEntry(i); } // Compress a location prefix, adding it to the bidirectional map (indexToPrefix_, // prefixToIndex_) if it is not already present. private int prefixToIndex(String s) { return prefixMap_.getIndex(s); } // A surrogate for THdfsPartitionLocation, which represents a partition's location // relative to its parent table's list of partition prefixes. public class Location { // 'prefix_index_' represents the portion of the partition's location that comes before // the last N directories, where N is the number of partitioning columns. // 'prefix_index_' is an index into // HdfsPartitionLocationCompressor.this.indexToPrefix_. 'suffix_' is the rest of the // partition location. // // TODO: Since each partition stores the literal values for the partitioning columns, // we could also elide the column names and values from suffix_ when a partition is in // the canonical location "/partitioning_column_name_1=value_1/..." private final int prefix_index_; private final String suffix_; public Location(String location) { Preconditions.checkNotNull(location); Pair<String, String> locationParts = decompose(location); prefix_index_ = HdfsPartitionLocationCompressor.this.prefixToIndex(locationParts.first); suffix_ = locationParts.second; } public Location(THdfsPartitionLocation thrift) { Preconditions.checkNotNull(thrift); prefix_index_ = thrift.prefix_index; suffix_ = thrift.getSuffix(); } public THdfsPartitionLocation toThrift() { return new THdfsPartitionLocation(prefix_index_, suffix_); } @Override public String toString() { return HdfsPartitionLocationCompressor.this.indexToPrefix(prefix_index_) + suffix_; } @Override public int hashCode() { return toString().hashCode(); } @Override public boolean equals(Object obj) { return (obj instanceof Location) && (toString() == obj.toString()); } // Decompose a location string by removing its last N directories, where N is the // number of clustering columns. The result is a Pair<String,String> where the first // String is the prefix and the second is the suffix. (In orther words, their // concatenation equals the input.) If the input does not have at least N '/' // characters, the prefix is empty and the suffix is the entire input. private Pair<String, String> decompose(String s) { Preconditions.checkNotNull(s); int numClusteringColumns = HdfsPartitionLocationCompressor.this.numClusteringColumns_; if (numClusteringColumns == 0) return new Pair<String, String>(s, ""); // Iterate backwards over the input until we have passed 'numClusteringColumns' // directories. What is left is the prefix. int i = s.length() - 1; // If the string ends in '/', iterating past it does not pass a clustering column. if (i >= 0 && s.charAt(i) == '/') --i; for (; numClusteringColumns > 0 && i >= 0; --i) { if (s.charAt(i) == '/') --numClusteringColumns; } // If we successfully removed all the partition directories, s.charAt(i+1) is '/' // and we can include it in the prefix. if (0 == numClusteringColumns) ++i; return new Pair<String, String>(s.substring(0, i + 1), s.substring(i + 1)); } } }