org.apache.hadoop.hive.ql.parse.EximUtil.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.EximUtil.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeMap;

import com.google.common.base.Function;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.thrift.TDeserializer;
import org.apache.thrift.TException;
import org.apache.thrift.TSerializer;
import org.apache.thrift.protocol.TJSONProtocol;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import javax.annotation.Nullable;

/**
 *
 * EximUtil. Utility methods for the export/import semantic
 * analyzers.
 *
 */
public class EximUtil {

    private static Log LOG = LogFactory.getLog(EximUtil.class);

    private EximUtil() {
    }

    /**
     * Initialize the URI where the exported data collection is
     * to created for export, or is present for import
     */
    static URI getValidatedURI(HiveConf conf, String dcPath) throws SemanticException {
        try {
            boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
            URI uri = new Path(dcPath).toUri();
            String scheme = uri.getScheme();
            String authority = uri.getAuthority();
            String path = uri.getPath();
            LOG.info("Path before norm :" + path);
            // generate absolute path relative to home directory
            if (!path.startsWith("/")) {
                if (testMode) {
                    path = (new Path(System.getProperty("test.tmp.dir"), path)).toUri().getPath();
                } else {
                    path = (new Path(new Path("/user/" + System.getProperty("user.name")), path)).toUri().getPath();
                }
            }
            // set correct scheme and authority
            if (StringUtils.isEmpty(scheme)) {
                if (testMode) {
                    scheme = "pfile";
                } else {
                    scheme = "hdfs";
                }
            }

            // if scheme is specified but not authority then use the default
            // authority
            if (StringUtils.isEmpty(authority)) {
                URI defaultURI = FileSystem.get(conf).getUri();
                authority = defaultURI.getAuthority();
            }

            LOG.info("Scheme:" + scheme + ", authority:" + authority + ", path:" + path);
            Collection<String> eximSchemes = conf
                    .getStringCollection(HiveConf.ConfVars.HIVE_EXIM_URI_SCHEME_WL.varname);
            if (!eximSchemes.contains(scheme)) {
                throw new SemanticException(
                        ErrorMsg.INVALID_PATH.getMsg("only the following file systems accepted for export/import : "
                                + conf.get(HiveConf.ConfVars.HIVE_EXIM_URI_SCHEME_WL.varname)));
            }

            try {
                return new URI(scheme, authority, path, null, null);
            } catch (URISyntaxException e) {
                throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
            }
        } catch (IOException e) {
            throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(), e);
        }
    }

    static void validateTable(org.apache.hadoop.hive.ql.metadata.Table table) throws SemanticException {
        if (table.isOffline()) {
            throw new SemanticException(
                    ErrorMsg.OFFLINE_TABLE_OR_PARTITION.getMsg(":Table " + table.getTableName()));
        }
        if (table.isView()) {
            throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
        }
        if (table.isNonNative()) {
            throw new SemanticException(ErrorMsg.EXIM_FOR_NON_NATIVE.getMsg());
        }
    }

    public static String relativeToAbsolutePath(HiveConf conf, String location) throws SemanticException {
        boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
        if (testMode) {
            URI uri = new Path(location).toUri();
            String scheme = uri.getScheme();
            String authority = uri.getAuthority();
            String path = uri.getPath();
            if (!path.startsWith("/")) {
                path = (new Path(System.getProperty("test.tmp.dir"), path)).toUri().getPath();
            }
            if (StringUtils.isEmpty(scheme)) {
                scheme = "pfile";
            }
            try {
                uri = new URI(scheme, authority, path, null, null);
            } catch (URISyntaxException e) {
                throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
            }
            return uri.toString();
        } else {
            //no-op for non-test mode for now
            return location;
        }
    }

    /* major version number should match for backward compatibility */
    public static final String METADATA_FORMAT_VERSION = "0.1";
    /* If null, then the major version number should match */
    public static final String METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION = null;

    public static void createExportDump(FileSystem fs, Path metadataPath,
            org.apache.hadoop.hive.ql.metadata.Table tableHandle,
            Iterable<org.apache.hadoop.hive.ql.metadata.Partition> partitions, ReplicationSpec replicationSpec)
            throws SemanticException, IOException {

        if (replicationSpec == null) {
            replicationSpec = new ReplicationSpec(); // instantiate default values if not specified
        }
        if (tableHandle == null) {
            replicationSpec.setNoop(true);
        }

        OutputStream out = fs.create(metadataPath);
        JsonGenerator jgen = (new JsonFactory()).createJsonGenerator(out);
        jgen.writeStartObject();
        jgen.writeStringField("version", METADATA_FORMAT_VERSION);
        if (METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION != null) {
            jgen.writeStringField("fcversion", METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION);
        }

        if (replicationSpec.isInReplicationScope()) {
            for (ReplicationSpec.KEY key : ReplicationSpec.KEY.values()) {
                String value = replicationSpec.get(key);
                if (value != null) {
                    jgen.writeStringField(key.toString(), value);
                }
            }
            if (tableHandle != null) {
                Table ttable = tableHandle.getTTable();
                ttable.putToParameters(ReplicationSpec.KEY.CURR_STATE_ID.toString(),
                        replicationSpec.getCurrentReplicationState());
                if ((ttable.getParameters().containsKey("EXTERNAL"))
                        && (ttable.getParameters().get("EXTERNAL").equalsIgnoreCase("TRUE"))) {
                    // Replication destination will not be external - override if set
                    ttable.putToParameters("EXTERNAL", "FALSE");
                }
                if (ttable.isSetTableType()
                        && ttable.getTableType().equalsIgnoreCase(TableType.EXTERNAL_TABLE.toString())) {
                    // Replication dest will not be external - override if set
                    ttable.setTableType(TableType.MANAGED_TABLE.toString());
                }
            }
        } else {
            // ReplicationSpec.KEY scopeKey = ReplicationSpec.KEY.REPL_SCOPE;
            // write(out, ",\""+ scopeKey.toString() +"\":\"" + replicationSpec.get(scopeKey) + "\"");
            // TODO: if we want to be explicit about this dump not being a replication dump, we can
            // uncomment this else section, but currently unnneeded. Will require a lot of golden file
            // regen if we do so.
        }
        if ((tableHandle != null) && (!replicationSpec.isNoop())) {
            TSerializer serializer = new TSerializer(new TJSONProtocol.Factory());
            try {
                jgen.writeStringField("table", serializer.toString(tableHandle.getTTable(), "UTF-8"));
                jgen.writeFieldName("partitions");
                jgen.writeStartArray();
                if (partitions != null) {
                    for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) {
                        Partition tptn = partition.getTPartition();
                        if (replicationSpec.isInReplicationScope()) {
                            tptn.putToParameters(ReplicationSpec.KEY.CURR_STATE_ID.toString(),
                                    replicationSpec.getCurrentReplicationState());
                            if ((tptn.getParameters().containsKey("EXTERNAL"))
                                    && (tptn.getParameters().get("EXTERNAL").equalsIgnoreCase("TRUE"))) {
                                // Replication destination will not be external
                                tptn.putToParameters("EXTERNAL", "FALSE");
                            }
                        }
                        jgen.writeString(serializer.toString(tptn, "UTF-8"));
                        jgen.flush();
                    }
                }
                jgen.writeEndArray();
            } catch (TException e) {
                throw new SemanticException(
                        ErrorMsg.GENERIC_ERROR.getMsg("Exception while serializing the metastore objects"), e);
            }
        }
        jgen.writeEndObject();
        jgen.close(); // JsonGenerator owns the OutputStream, so it closes it when we call close.
    }

    private static void write(OutputStream out, String s) throws IOException {
        out.write(s.getBytes("UTF-8"));
    }

    /**
     * Utility class to help return complex value from readMetaData function
     */
    public static class ReadMetaData {
        private final Table table;
        private final Iterable<Partition> partitions;
        private final ReplicationSpec replicationSpec;

        public ReadMetaData() {
            this(null, null, new ReplicationSpec());
        }

        public ReadMetaData(Table table, Iterable<Partition> partitions, ReplicationSpec replicationSpec) {
            this.table = table;
            this.partitions = partitions;
            this.replicationSpec = replicationSpec;
        }

        public Table getTable() {
            return table;
        }

        public Iterable<Partition> getPartitions() {
            return partitions;
        }

        public ReplicationSpec getReplicationSpec() {
            return replicationSpec;
        }
    };

    public static ReadMetaData readMetaData(FileSystem fs, Path metadataPath)
            throws IOException, SemanticException {
        FSDataInputStream mdstream = null;
        try {
            mdstream = fs.open(metadataPath);
            byte[] buffer = new byte[1024];
            ByteArrayOutputStream sb = new ByteArrayOutputStream();
            int read = mdstream.read(buffer);
            while (read != -1) {
                sb.write(buffer, 0, read);
                read = mdstream.read(buffer);
            }
            String md = new String(sb.toByteArray(), "UTF-8");
            JSONObject jsonContainer = new JSONObject(md);
            String version = jsonContainer.getString("version");
            String fcversion = getJSONStringEntry(jsonContainer, "fcversion");
            checkCompatibility(version, fcversion);
            String tableDesc = getJSONStringEntry(jsonContainer, "table");
            Table table = null;
            List<Partition> partitionsList = null;
            if (tableDesc != null) {
                table = new Table();
                TDeserializer deserializer = new TDeserializer(new TJSONProtocol.Factory());
                deserializer.deserialize(table, tableDesc, "UTF-8");
                // TODO : jackson-streaming-iterable-redo this
                JSONArray jsonPartitions = new JSONArray(jsonContainer.getString("partitions"));
                partitionsList = new ArrayList<Partition>(jsonPartitions.length());
                for (int i = 0; i < jsonPartitions.length(); ++i) {
                    String partDesc = jsonPartitions.getString(i);
                    Partition partition = new Partition();
                    deserializer.deserialize(partition, partDesc, "UTF-8");
                    partitionsList.add(partition);
                }
            }

            return new ReadMetaData(table, partitionsList, readReplicationSpec(jsonContainer));
        } catch (JSONException e) {
            throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg("Error in serializing metadata"), e);
        } catch (TException e) {
            throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg("Error in serializing metadata"), e);
        } finally {
            if (mdstream != null) {
                mdstream.close();
            }
        }
    }

    private static ReplicationSpec readReplicationSpec(final JSONObject jsonContainer) {
        Function<String, String> keyFetcher = new Function<String, String>() {
            @Override
            public String apply(@Nullable String s) {
                return getJSONStringEntry(jsonContainer, s);
            }
        };
        return new ReplicationSpec(keyFetcher);
    }

    private static String getJSONStringEntry(JSONObject jsonContainer, String name) {
        String retval = null;
        try {
            retval = jsonContainer.getString(name);
        } catch (JSONException ignored) {
        }
        return retval;
    }

    /* check the forward and backward compatibility */
    private static void checkCompatibility(String version, String fcVersion) throws SemanticException {
        doCheckCompatibility(METADATA_FORMAT_VERSION, version, fcVersion);
    }

    /* check the forward and backward compatibility */
    public static void doCheckCompatibility(String currVersion, String version, String fcVersion)
            throws SemanticException {
        if (version == null) {
            throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Version number missing"));
        }
        StringTokenizer st = new StringTokenizer(version, ".");
        int data_major = Integer.parseInt(st.nextToken());

        StringTokenizer st2 = new StringTokenizer(currVersion, ".");
        int code_major = Integer.parseInt(st2.nextToken());
        int code_minor = Integer.parseInt(st2.nextToken());

        if (code_major > data_major) {
            throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not backward compatible."
                    + " Producer version " + version + ", Consumer version " + currVersion));
        } else {
            if ((fcVersion == null) || fcVersion.isEmpty()) {
                if (code_major < data_major) {
                    throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not forward compatible."
                            + "Producer version " + version + ", Consumer version " + currVersion));
                }
            } else {
                StringTokenizer st3 = new StringTokenizer(fcVersion, ".");
                int fc_major = Integer.parseInt(st3.nextToken());
                int fc_minor = Integer.parseInt(st3.nextToken());
                if ((fc_major > code_major) || ((fc_major == code_major) && (fc_minor > code_minor))) {
                    throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not forward compatible."
                            + "Minimum version " + fcVersion + ", Consumer version " + currVersion));
                }
            }
        }
    }

    /**
     * Return the partition specification from the specified keys and values
     *
     * @param partCols
     *          the names of the partition keys
     * @param partVals
     *          the values of the partition keys
     *
     * @return the partition specification as a map
     */
    public static Map<String, String> makePartSpec(List<FieldSchema> partCols, List<String> partVals) {
        Map<String, String> partSpec = new TreeMap<String, String>();
        for (int i = 0; i < partCols.size(); ++i) {
            partSpec.put(partCols.get(i).getName(), partVals.get(i));
        }
        return partSpec;
    }

    /**
     * Compares the schemas - names, types and order, but ignoring comments
     *
     * @param newSchema
     *          the new schema
     * @param oldSchema
     *          the old schema
     * @return a boolean indicating match
     */
    public static boolean schemaCompare(List<FieldSchema> newSchema, List<FieldSchema> oldSchema) {
        Iterator<FieldSchema> newColIter = newSchema.iterator();
        for (FieldSchema oldCol : oldSchema) {
            FieldSchema newCol = null;
            if (newColIter.hasNext()) {
                newCol = newColIter.next();
            } else {
                return false;
            }
            // not using FieldSchema.equals as comments can be different
            if (!oldCol.getName().equals(newCol.getName()) || !oldCol.getType().equals(newCol.getType())) {
                return false;
            }
        }
        if (newColIter.hasNext()) {
            return false;
        }
        return true;
    }
}