com.cloudera.impala.analysis.LoadDataStmt.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.impala.analysis.LoadDataStmt.java

Source

// Copyright 2013 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.cloudera.impala.analysis;

import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;

import com.cloudera.impala.authorization.Privilege;
import com.cloudera.impala.catalog.HdfsFileFormat;
import com.cloudera.impala.catalog.HdfsPartition;
import com.cloudera.impala.catalog.HdfsTable;
import com.cloudera.impala.catalog.Table;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.common.FileSystemUtil;
import com.cloudera.impala.thrift.TLoadDataReq;
import com.cloudera.impala.thrift.TTableName;
import com.cloudera.impala.util.TAccessLevelUtil;
import com.google.common.base.Preconditions;

/*
 * Represents a LOAD DATA statement for moving data into an existing table:
 * LOAD DATA INPATH 'filepath' [OVERWRITE] INTO TABLE <table name>
 * [PARTITION (partcol1=val1, partcol2=val2 ...)]
 *
 * The LOAD DATA operation supports loading (moving) a single file or all files in a
 * given source directory to a table or partition location. If OVERWRITE is true, all
 * exiting files in the destination will be removed before moving the new data in.
 * If OVERWRITE is false, existing files will be preserved. If there are any file name
 * conflicts, the new files will be uniquified by inserting a UUID into the file name
 * (preserving the extension).
 * Loading hidden files is not supported and any hidden files in the source or
 * destination are preserved, even if OVERWRITE is true.
 */
public class LoadDataStmt extends StatementBase {
    private final TableName tableName_;
    private final HdfsUri sourceDataPath_;
    private final PartitionSpec partitionSpec_;
    private final boolean overwrite_;

    // Set during analysis
    private String dbName_;

    public LoadDataStmt(TableName tableName, HdfsUri sourceDataPath, boolean overwrite,
            PartitionSpec partitionSpec) {
        Preconditions.checkNotNull(tableName);
        Preconditions.checkNotNull(sourceDataPath);
        this.tableName_ = tableName;
        this.sourceDataPath_ = sourceDataPath;
        this.overwrite_ = overwrite;
        this.partitionSpec_ = partitionSpec;
    }

    public String getTbl() {
        return tableName_.getTbl();
    }

    public String getDb() {
        Preconditions.checkNotNull(dbName_);
        return dbName_;
    }

    /*
     * Print SQL syntax corresponding to this node.
     * @see com.cloudera.impala.parser.ParseNode#toSql()
     */
    @Override
    public String toSql() {
        StringBuilder sb = new StringBuilder("LOAD DATA INPATH '");
        sb.append(sourceDataPath_ + "' ");
        if (overwrite_)
            sb.append("OVERWRITE ");
        sb.append("INTO TABLE " + tableName_.toString());
        if (partitionSpec_ != null)
            sb.append(" " + partitionSpec_.toSql());
        return sb.toString();
    }

    @Override
    public void analyze(Analyzer analyzer) throws AnalysisException {
        dbName_ = analyzer.getTargetDbName(tableName_);
        Table table = analyzer.getTable(tableName_, Privilege.INSERT);
        if (!(table instanceof HdfsTable)) {
            throw new AnalysisException("LOAD DATA only supported for HDFS tables: " + dbName_ + "." + getTbl());
        }

        // Analyze the partition spec, if one was specified.
        if (partitionSpec_ != null) {
            partitionSpec_.setTableName(tableName_);
            partitionSpec_.setPartitionShouldExist();
            partitionSpec_.setPrivilegeRequirement(Privilege.INSERT);
            partitionSpec_.analyze(analyzer);
        } else {
            if (table.getMetaStoreTable().getPartitionKeysSize() > 0) {
                throw new AnalysisException("Table is partitioned but no partition spec was " + "specified: "
                        + dbName_ + "." + getTbl());
            }
        }
        analyzePaths(analyzer, (HdfsTable) table);
    }

    private void analyzePaths(Analyzer analyzer, HdfsTable hdfsTable) throws AnalysisException {
        // The user must have permission to access the source location. Since the files will
        // be moved from this location, the user needs to have all permission.
        sourceDataPath_.analyze(analyzer, Privilege.ALL);

        try {
            Path source = sourceDataPath_.getPath();
            FileSystem fs = source.getFileSystem(FileSystemUtil.getConfiguration());
            // sourceDataPath_.analyze() ensured that path is on an HDFS filesystem.
            Preconditions.checkState(fs instanceof DistributedFileSystem);
            DistributedFileSystem dfs = (DistributedFileSystem) fs;
            if (!dfs.exists(source)) {
                throw new AnalysisException(String.format("INPATH location '%s' does not exist.", sourceDataPath_));
            }

            if (dfs.isDirectory(source)) {
                if (FileSystemUtil.getTotalNumVisibleFiles(source) == 0) {
                    throw new AnalysisException(
                            String.format("INPATH location '%s' contains no visible files.", sourceDataPath_));
                }
                if (FileSystemUtil.containsSubdirectory(source)) {
                    throw new AnalysisException(
                            String.format("INPATH location '%s' cannot contain subdirectories.", sourceDataPath_));
                }
            } else { // INPATH points to a file.
                if (FileSystemUtil.isHiddenFile(source.getName())) {
                    throw new AnalysisException(
                            String.format("INPATH location '%s' points to a hidden file.", source));
                }
            }

            String noWriteAccessErrorMsg = String.format(
                    "Unable to LOAD DATA into "
                            + "target table (%s) because Impala does not have WRITE access to HDFS " + "location: ",
                    hdfsTable.getFullName());

            HdfsPartition partition;
            String location;
            if (partitionSpec_ != null) {
                partition = hdfsTable.getPartition(partitionSpec_.getPartitionSpecKeyValues());
                location = partition.getLocation();
                if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {
                    throw new AnalysisException(noWriteAccessErrorMsg + partition.getLocation());
                }
            } else {
                // "default" partition
                partition = hdfsTable.getPartitions().get(0);
                location = hdfsTable.getLocation();
                if (!hdfsTable.hasWriteAccess()) {
                    throw new AnalysisException(noWriteAccessErrorMsg + hdfsTable.getLocation());
                }
            }
            Preconditions.checkNotNull(partition);

            // Until Frontend.loadTableData() can handle cross-filesystem and filesystems
            // that aren't HDFS, require that source and dest are on the same HDFS.
            if (!FileSystemUtil.isPathOnFileSystem(new Path(location), fs)) {
                throw new AnalysisException(String.format(
                        "Unable to LOAD DATA into target table (%s) because source path (%s) and "
                                + "destination %s (%s) are on different file-systems.",
                        hdfsTable.getFullName(), source, partitionSpec_ == null ? "table" : "partition",
                        partition.getLocation()));
            }
            // Verify the files being loaded are supported.
            for (FileStatus fStatus : fs.listStatus(source)) {
                if (fs.isDirectory(fStatus.getPath()))
                    continue;
                StringBuilder errorMsg = new StringBuilder();
                HdfsFileFormat fileFormat = partition.getInputFormatDescriptor().getFileFormat();
                if (!fileFormat.isFileCompressionTypeSupported(fStatus.getPath().toString(), errorMsg)) {
                    throw new AnalysisException(errorMsg.toString());
                }
            }
        } catch (FileNotFoundException e) {
            throw new AnalysisException("File not found: " + e.getMessage(), e);
        } catch (IOException e) {
            throw new AnalysisException("Error accessing file system: " + e.getMessage(), e);
        }
    }

    public TLoadDataReq toThrift() {
        TLoadDataReq loadDataReq = new TLoadDataReq();
        loadDataReq.setTable_name(new TTableName(getDb(), getTbl()));
        loadDataReq.setSource_path(sourceDataPath_.toString());
        loadDataReq.setOverwrite(overwrite_);
        if (partitionSpec_ != null) {
            loadDataReq.setPartition_spec(partitionSpec_.toThrift());
        }
        return loadDataReq;
    }
}