org.apache.hive.hcatalog.api.repl.ReplicationUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.api.repl.ReplicationUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hive.hcatalog.api.repl;

import com.google.common.base.Function;
import com.google.common.base.Objects;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOExceptionWithCause;
import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
import org.apache.hive.hcatalog.api.HCatDatabase;
import org.apache.hive.hcatalog.api.HCatPartition;
import org.apache.hive.hcatalog.api.HCatTable;
import org.apache.hive.hcatalog.data.ReaderWriter;

import javax.annotation.Nullable;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Map;

public class ReplicationUtils {

    public final static String REPL_STATE_ID = ReplicationSpec.KEY.CURR_STATE_ID.toString();

    private ReplicationUtils() {
        // dummy private constructor, since this class is a collection of static utility methods.
    }

    /**
     * Gets the last known replication state of this db. This is
     * applicable only if it is the destination of a replication
     * and has had data replicated into it via imports previously.
     * Defaults to 0.
     */
    public static long getLastReplicationId(HCatDatabase db) {
        Map<String, String> props = db.getProperties();
        if (props != null) {
            if (props.containsKey(REPL_STATE_ID)) {
                return Long.parseLong(props.get(REPL_STATE_ID));
            }
        }
        return 0l; // default is to return earliest possible state.
    }

    /**
     * Gets the last known replication state of the provided table. This
     * is applicable only if it is the destination of a replication
     * and has had data replicated into it via imports previously.
     * Defaults to 0.
     */
    public static long getLastReplicationId(HCatTable tbl) {
        Map<String, String> tblProps = tbl.getTblProps();
        if (tblProps != null) {
            if (tblProps.containsKey(REPL_STATE_ID)) {
                return Long.parseLong(tblProps.get(REPL_STATE_ID));
            }
        }
        return 0l; // default is to return earliest possible state.
    }

    /**
     * Gets the last known replication state of the provided partition.
     * This is applicable only if it is the destination of a replication
     * and has had data replicated into it via imports previously.
     * If that is not available, but parent table is provided,
     * defaults to parent table's replication state. If that is also
     * unknown, defaults to 0.
     */
    public static long getLastReplicationId(HCatPartition ptn, @Nullable HCatTable parentTable) {
        Map<String, String> parameters = ptn.getParameters();
        if (parameters != null) {
            if (parameters.containsKey(REPL_STATE_ID)) {
                return Long.parseLong(parameters.get(REPL_STATE_ID));
            }
        }

        if (parentTable != null) {
            return getLastReplicationId(parentTable);
        }
        return 0l; // default is to return earliest possible state.
    }

    /**
     * Used to generate a unique key for a combination of given event id, dbname,
     * tablename and partition keyvalues. This is used to feed in a name for creating
     * staging directories for exports and imports. This should be idempotent given
     * the same values, i.e. hashcode-like, but at the same time, be guaranteed to be
     * different for every possible partition, while being "readable-ish". Basically,
     * we concat the alphanumberic versions of all of the above, along with a hashcode
     * of the db, tablename and ptn key-value pairs
     */
    public static String getUniqueKey(long eventId, String db, String table, Map<String, String> ptnDesc) {
        StringBuilder sb = new StringBuilder();
        sb.append(eventId);
        sb.append('.');
        sb.append(toStringWordCharsOnly(db));
        sb.append('.');
        sb.append(toStringWordCharsOnly(table));
        sb.append('.');
        sb.append(toStringWordCharsOnly(ptnDesc));
        sb.append('.');
        sb.append(Objects.hashCode(db, table, ptnDesc));
        return sb.toString();
    }

    /**
     * Return alphanumeric(and '_') representation of a Map<String,String>
     *
     */
    private static String toStringWordCharsOnly(Map<String, String> map) {
        if (map == null) {
            return "null";
        }
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        for (Map.Entry<String, String> e : map.entrySet()) {
            if (!first) {
                sb.append(',');
            }
            sb.append(toStringWordCharsOnly(e.getKey()));
            sb.append('=');
            sb.append(toStringWordCharsOnly(e.getValue()));
            first = false;
        }
        return sb.toString();
    }

    /**
     * Return alphanumeric(and '_') chars only of a string, lowercased
     */
    public static String toStringWordCharsOnly(String s) {
        return (s == null) ? "null" : s.replaceAll("[\\W]", "").toLowerCase();
    }

    /**
     * Utility function to use in conjunction with .withDbNameMapping / .withTableNameMapping,
     * if we desire usage of a Map<String,String> instead of implementing a Function<String,String>
     */
    Function<String, String> mapBasedFunction(final Map<String, String> m) {
        return new Function<String, String>() {

            @Nullable
            @Override
            public String apply(@Nullable String s) {
                if ((m == null) || (!m.containsKey(s))) {
                    return s;
                }
                return m.get(s);
            }
        };
    }

    /**
     * Return a mapping from a given map function if available, and the key itself if not.
     */
    public static String mapIfMapAvailable(String s, Function<String, String> mapping) {
        try {
            if (mapping != null) {
                return mapping.apply(s);
            }
        } catch (IllegalArgumentException iae) {
            // The key wasn't present in the mapping, and the function didn't
            // return a default value - ignore, and use our default.
        }
        // We return the key itself, since no mapping was available/returned
        return s;
    }

    public static String partitionDescriptor(Map<String, String> ptnDesc) {
        StringBuilder sb = new StringBuilder();
        if ((ptnDesc != null) && (!ptnDesc.isEmpty())) {
            boolean first = true;
            sb.append(" PARTITION (");
            for (Map.Entry e : ptnDesc.entrySet()) {
                if (!first) {
                    sb.append(", ");
                } else {
                    first = false;
                }
                sb.append(e.getKey()); // TODO : verify if any quoting is needed for keys
                sb.append('=');
                sb.append('"');
                sb.append(e.getValue()); // TODO : verify if any escaping is needed for values
                sb.append('"');
            }
            sb.append(')');
        }
        return sb.toString();
    }

    /**
     * Command implements Writable, but that's not terribly easy to use compared
     * to String, even if it plugs in easily into the rest of Hadoop. Provide
     * utility methods to easily serialize and deserialize Commands
     *
     * serializeCommand returns a base64 String representation of given command
     */
    public static String serializeCommand(Command command) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        DataOutput dataOutput = new DataOutputStream(baos);
        ReaderWriter.writeDatum(dataOutput, command.getClass().getName());
        command.write(dataOutput);
        return Base64.encodeBase64URLSafeString(baos.toByteArray());
    }

    /**
     * Command implements Writable, but that's not terribly easy to use compared
     * to String, even if it plugs in easily into the rest of Hadoop. Provide
     * utility methods to easily serialize and deserialize Commands
     *
     * deserializeCommand instantiates a concrete Command and initializes it,
     * given a base64 String representation of it.
     */
    public static Command deserializeCommand(String s) throws IOException {
        DataInput dataInput = new DataInputStream(new ByteArrayInputStream(Base64.decodeBase64(s)));
        String clazz = (String) ReaderWriter.readDatum(dataInput);
        Command cmd;
        try {
            cmd = (Command) Class.forName(clazz).newInstance();
        } catch (Exception e) {
            throw new IOExceptionWithCause("Error instantiating class " + clazz, e);
        }
        cmd.readFields(dataInput);
        return cmd;
    }

}