com.cloudera.sqoop.TestAppendUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.sqoop.TestAppendUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.sqoop;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.cloudera.sqoop.manager.ImportJobContext;

import com.cloudera.sqoop.testutil.CommonArgs;
import com.cloudera.sqoop.testutil.HsqldbTestServer;
import com.cloudera.sqoop.testutil.ImportJobTestCase;
import com.cloudera.sqoop.tool.ImportTool;
import com.cloudera.sqoop.util.AppendUtils;

/**
 * Test that --append works.
 */
public class TestAppendUtils extends ImportJobTestCase {

    private static final int PARTITION_DIGITS = 5;
    private static final String FILEPART_SEPARATOR = "-";

    public static final Log LOG = LogFactory.getLog(TestAppendUtils.class.getName());

    /**
     * Create the argv to pass to Sqoop.
     *
     * @return the argv as an array of strings.
     */
    protected ArrayList getOutputlessArgv(boolean includeHadoopFlags, boolean queryBased, String[] colNames,
            Configuration conf) {
        if (null == colNames) {
            colNames = getColNames();
        }

        String splitByCol = colNames[0];
        String columnsString = "";
        for (String col : colNames) {
            columnsString += col + ",";
        }

        ArrayList<String> args = new ArrayList<String>();

        if (includeHadoopFlags) {
            CommonArgs.addHadoopFlags(args);
        }

        if (queryBased) {
            args.add("--query");
            args.add("SELECT * FROM " + getTableName() + " WHERE $CONDITIONS");
        } else {
            args.add("--table");
            args.add(getTableName());
        }
        args.add("--columns");
        args.add(columnsString);
        args.add("--split-by");
        args.add(splitByCol);
        args.add("--connect");
        args.add(getConnectString());
        args.add("--as-sequencefile");
        args.add("--num-mappers");
        args.add("1");

        args.addAll(getExtraArgs(conf));

        return args;
    }

    // this test just uses the two int table.
    protected String getTableName() {
        return HsqldbTestServer.getTableName();
    }

    /** the same than ImportJobTestCase but without removing tabledir. */
    protected void runUncleanImport(String[] argv) throws IOException {
        // run the tool through the normal entry-point.
        int ret;
        try {
            Configuration conf = getConf();
            SqoopOptions opts = getSqoopOptions(conf);
            Sqoop sqoop = new Sqoop(new ImportTool(), conf, opts);
            ret = Sqoop.runSqoop(sqoop, argv);
        } catch (Exception e) {
            LOG.error("Got exception running Sqoop: " + e.toString());
            e.printStackTrace();
            ret = 1;
        }

        // expect a successful return.
        if (0 != ret) {
            throw new IOException("Failure during job; return status " + ret);
        }
    }

    /** @return FileStatus for data files only. */
    private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
        FileStatus[] fileStatuses = fs.listStatus(path);
        ArrayList files = new ArrayList();
        Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
        for (FileStatus fstat : fileStatuses) {
            String fname = fstat.getPath().getName();
            if (!fstat.isDir()) {
                Matcher mat = patt.matcher(fname);
                if (mat.matches()) {
                    files.add(fstat);
                }
            }
        }
        return (FileStatus[]) files.toArray(new FileStatus[files.size()]);
    }

    private class StatusPathComparator implements Comparator<FileStatus> {

        @Override
        public int compare(FileStatus fs1, FileStatus fs2) {
            return fs1.getPath().toString().compareTo(fs2.getPath().toString());
        }
    }

    /** @return a concat. string with file-creation dates excluding folders. */
    private String getFileCreationTimeImage(FileSystem fs, Path outputPath, int fileCount) throws IOException {
        // create string image with all file creation dates
        StringBuffer image = new StringBuffer();
        FileStatus[] fileStatuses = listFiles(fs, outputPath);
        // sort the file statuses by path so we have a stable order for
        // using 'fileCount'.
        Arrays.sort(fileStatuses, new StatusPathComparator());
        for (int i = 0; i < fileStatuses.length && i < fileCount; i++) {
            image.append(fileStatuses[i].getPath() + "=" + fileStatuses[i].getModificationTime());
        }
        return image.toString();
    }

    /** @return the number part of a partition */
    private int getFilePartition(Path file) {
        String filename = file.getName();
        int pos = filename.lastIndexOf(FILEPART_SEPARATOR);
        if (pos != -1) {
            String part = filename.substring(pos + 1, pos + 1 + PARTITION_DIGITS);
            return Integer.parseInt(part);
        } else {
            return 0;
        }
    }

    /**
     * Test for ouput path file-count increase, current files untouched and new
     * correct partition number.
     *
     * @throws IOException
     */
    public void runAppendTest(ArrayList args, Path outputPath) throws IOException {

        try {

            // ensure non-existing output dir for insert phase
            FileSystem fs = FileSystem.get(getConf());
            if (fs.exists(outputPath)) {
                fs.delete(outputPath, true);
            }

            // run Sqoop in INSERT mode
            String[] argv = (String[]) args.toArray(new String[0]);
            runUncleanImport(argv);

            // get current file count
            FileStatus[] fileStatuses = listFiles(fs, outputPath);
            Arrays.sort(fileStatuses, new StatusPathComparator());
            int previousFileCount = fileStatuses.length;

            // get string image with all file creation dates
            String previousImage = getFileCreationTimeImage(fs, outputPath, previousFileCount);

            // get current last partition number
            Path lastFile = fileStatuses[fileStatuses.length - 1].getPath();
            int lastPartition = getFilePartition(lastFile);

            // run Sqoop in APPEND mode
            args.add("--append");
            argv = (String[]) args.toArray(new String[0]);
            runUncleanImport(argv);

            // check directory file increase
            fileStatuses = listFiles(fs, outputPath);
            Arrays.sort(fileStatuses, new StatusPathComparator());
            int currentFileCount = fileStatuses.length;
            assertTrue("Output directory didn't got increased in file count ",
                    currentFileCount > previousFileCount);

            // check previous files weren't modified, also works for partition
            // overlapping
            String currentImage = getFileCreationTimeImage(fs, outputPath, previousFileCount);
            assertEquals("Previous files to appending operation were modified", currentImage, previousImage);

            // check that exists at least 1 new correlative partition
            // let's use a different way than the code being tested
            Path newFile = fileStatuses[previousFileCount].getPath(); // there is a
                                                                      // new bound now
            int newPartition = getFilePartition(newFile);
            assertTrue("New partition file isn't correlative", lastPartition + 1 == newPartition);

        } catch (Exception e) {
            LOG.error("Got Exception: " + StringUtils.stringifyException(e));
            fail(e.toString());
        }
    }

    /** independent to target-dir. */
    public void testAppend() throws IOException {
        ArrayList args = getOutputlessArgv(false, false, HsqldbTestServer.getFieldNames(), getConf());
        args.add("--warehouse-dir");
        args.add(getWarehouseDir());

        Path output = new Path(getWarehouseDir(), HsqldbTestServer.getTableName());
        runAppendTest(args, output);
    }

    /** working with target-dir. */
    public void testAppendToTargetDir() throws IOException {
        ArrayList args = getOutputlessArgv(false, false, HsqldbTestServer.getFieldNames(), getConf());
        String targetDir = getWarehouseDir() + "/tempTargetDir";
        args.add("--target-dir");
        args.add(targetDir);

        // there's no need for a new param
        // in diff. w/--warehouse-dir there will no be $tablename dir
        Path output = new Path(targetDir);
        runAppendTest(args, output);
    }

    /**
     * Query based import should also work in append mode.
     *
     * @throws IOException
     */
    public void testAppendWithQuery() throws IOException {
        ArrayList args = getOutputlessArgv(false, true, HsqldbTestServer.getFieldNames(), getConf());
        String targetDir = getWarehouseDir() + "/tempTargetDir";
        args.add("--target-dir");
        args.add(targetDir);

        Path output = new Path(targetDir);
        runAppendTest(args, output);
    }

    /**
     * If the append source does not exist, don't crash.
     */
    public void testAppendSrcDoesNotExist() throws IOException {
        Configuration conf = new Configuration();
        if (!isOnPhysicalCluster()) {
            conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
        }
        SqoopOptions options = new SqoopOptions(conf);
        options.setTableName("meep");
        Path missingPath = new Path("doesNotExistForAnyReason");
        FileSystem local = FileSystem.getLocal(conf);
        assertFalse(local.exists(missingPath));
        ImportJobContext importContext = new ImportJobContext("meep", null, options, missingPath);
        AppendUtils utils = new AppendUtils(importContext);
        utils.append();
    }

}