Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.cloudera.sqoop.manager.SQLServerManagerImportManualTest.java

License:Apache License

private void doImportAndVerify(String tableName, String[] expectedResults, String... extraArgs)
        throws IOException {

    Path warehousePath = new Path(this.getWarehouseDir());
    Path tablePath = new Path(warehousePath, tableName);
    Path filePath = new Path(tablePath, "part-m-00000");

    File tableFile = new File(tablePath.toString());
    if (tableFile.exists() && tableFile.isDirectory()) {
        // remove the directory before running the import.
        FileListing.recursiveDeleteDir(tableFile);
    }//w w w.j  av  a2  s. c om

    String[] argv = getArgv(tableName, extraArgs);
    try {
        runImport(argv);
    } catch (IOException ioe) {
        LOG.error("Got IOException during import: " + ioe.toString());
        ioe.printStackTrace();
        fail(ioe.toString());
    }

    File f = new File(filePath.toString());
    assertTrue("Could not find imported data file", f.exists());
    BufferedReader r = null;
    try {
        // Read through the file and make sure it's all there.
        r = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
        for (String expectedLine : expectedResults) {
            assertEquals(expectedLine, r.readLine());
        }
    } catch (IOException ioe) {
        LOG.error("Got IOException verifying results: " + ioe.toString());
        ioe.printStackTrace();
        fail(ioe.toString());
    } finally {
        IOUtils.closeStream(r);
    }
}

From source file:com.cloudera.sqoop.mapreduce.MergeJob.java

License:Apache License

public boolean runMergeJob() throws IOException {
    Configuration conf = options.getConf();
    Job job = new Job(conf);

    String userClassName = options.getClassName();
    if (null == userClassName) {
        // Shouldn't get here.
        throw new IOException("Record class name not specified with " + "--class-name.");
    }/*from w ww.  j av  a2  s.  c  o  m*/

    // Set the external jar to use for the job.
    String existingJar = options.getExistingJarName();
    if (existingJar != null) {
        // User explicitly identified a jar path.
        LOG.debug("Setting job jar to user-specified jar: " + existingJar);
        job.getConfiguration().set("mapred.jar", existingJar);
    } else {
        // Infer it from the location of the specified class, if it's on the
        // classpath.
        try {
            Class<? extends Object> userClass = conf.getClassByName(userClassName);
            if (null != userClass) {
                String userJar = Jars.getJarPathForClass(userClass);
                LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar);
                job.getConfiguration().set("mapred.jar", userJar);
            } else {
                LOG.warn("Specified class " + userClassName + " is not in a jar. "
                        + "MapReduce may not find the class");
            }
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    }

    try {
        Path oldPath = new Path(options.getMergeOldPath());
        Path newPath = new Path(options.getMergeNewPath());

        Configuration jobConf = job.getConfiguration();
        FileSystem fs = FileSystem.get(jobConf);
        oldPath = oldPath.makeQualified(fs);
        newPath = newPath.makeQualified(fs);

        FileInputFormat.addInputPath(job, oldPath);
        FileInputFormat.addInputPath(job, newPath);

        jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString());
        jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString());
        jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol());
        jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName);

        FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir()));

        if (ExportJobBase.isSequenceFiles(jobConf, newPath)) {
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(MergeRecordMapper.class);
        } else {
            job.setMapperClass(MergeTextMapper.class);
            job.setOutputFormatClass(RawKeyTextOutputFormat.class);
        }

        jobConf.set("mapred.output.key.class", userClassName);
        job.setOutputValueClass(NullWritable.class);

        job.setReducerClass(MergeReducer.class);

        // Set the intermediate data types.
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MergeRecord.class);

        // Make sure Sqoop and anything else we need is on the classpath.
        cacheJars(job, null);
        return this.runJob(job);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }
}

From source file:com.cloudera.sqoop.mapreduce.MergeMapperBase.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    keyColName = conf.get(MergeJob.MERGE_KEY_COL_KEY);

    InputSplit is = context.getInputSplit();
    FileSplit fs = (FileSplit) is;//from   w  ww.  j av  a  2s.  c o  m
    Path splitPath = fs.getPath();

    if (splitPath.toString().startsWith(conf.get(MergeJob.MERGE_NEW_PATH_KEY))) {
        this.isNew = true;
    } else if (splitPath.toString().startsWith(conf.get(MergeJob.MERGE_OLD_PATH_KEY))) {
        this.isNew = false;
    } else {
        throw new IOException(
                "File " + splitPath + " is not under new path " + conf.get(MergeJob.MERGE_NEW_PATH_KEY)
                        + " or old path " + conf.get(MergeJob.MERGE_OLD_PATH_KEY));
    }
}

From source file:com.cloudera.sqoop.TestAllTables.java

License:Apache License

public void testMultiTableImport() throws IOException {
    String[] argv = getArgv(true, null);
    runImport(new ImportAllTablesTool(), argv);

    Path warehousePath = new Path(this.getWarehouseDir());
    int i = 0;/*from w  ww . ja  va 2  s.c om*/
    for (String tableName : this.tableNames) {
        Path tablePath = new Path(warehousePath, tableName);
        Path filePath = new Path(tablePath, "part-m-00000");

        // dequeue the expected value for this table. This
        // list has the same order as the tableNames list.
        String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0);
        this.expectedStrings.remove(0);

        BufferedReader reader = null;
        if (!isOnPhysicalCluster()) {
            reader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(new File(filePath.toString()))));
        } else {
            FileSystem dfs = FileSystem.get(getConf());
            FSDataInputStream dis = dfs.open(filePath);
            reader = new BufferedReader(new InputStreamReader(dis));
        }
        try {
            String line = reader.readLine();
            assertEquals("Table " + tableName + " expected a different string", expectedVal, line);
        } finally {
            IOUtils.closeStream(reader);
        }
    }
}

From source file:com.cloudera.sqoop.TestAllTables.java

License:Apache License

public void testMultiTableImportWithExclude() throws IOException {
    String exclude = this.tableNames.get(0);
    String[] argv = getArgv(true, new String[] { exclude });
    runImport(new ImportAllTablesTool(), argv);

    Path warehousePath = new Path(this.getWarehouseDir());
    int i = 0;//from  w  w w  .j  a v  a  2  s .c o  m
    for (String tableName : this.tableNames) {
        Path tablePath = new Path(warehousePath, tableName);
        Path filePath = new Path(tablePath, "part-m-00000");

        // dequeue the expected value for this table. This
        // list has the same order as the tableNames list.
        String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0);
        this.expectedStrings.remove(0);

        BufferedReader reader = null;
        if (!isOnPhysicalCluster()) {
            reader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(new File(filePath.toString()))));
        } else {
            FSDataInputStream dis;
            FileSystem dfs = FileSystem.get(getConf());
            if (tableName.equals(exclude)) {
                try {
                    dis = dfs.open(filePath);
                    assertFalse(true);
                } catch (FileNotFoundException e) {
                    // Success
                    continue;
                }
            } else {
                dis = dfs.open(filePath);
            }
            reader = new BufferedReader(new InputStreamReader(dis));
        }
        try {
            String line = reader.readLine();
            assertEquals("Table " + tableName + " expected a different string", expectedVal, line);
        } finally {
            IOUtils.closeStream(reader);
        }
    }
}

From source file:com.cloudera.sqoop.TestFreeFormQueryImport.java

License:Apache License

/**
 * Create two tables that share the common id column.  Run free-form query
 * import on the result table that is created by joining the two tables on
 * the id column./*from   ww  w.j a  v  a2  s . com*/
 */
public void testSimpleJoin() throws IOException {
    tableNames = new ArrayList<String>();

    String[] types1 = { "SMALLINT", };
    String[] vals1 = { "1", };
    String tableName1 = getTableName();
    createTableWithColTypes(types1, vals1);
    tableNames.add(tableName1);

    incrementTableNum();

    String[] types2 = { "SMALLINT", "VARCHAR(32)", };
    String[] vals2 = { "1", "'foo'", };
    String tableName2 = getTableName();
    createTableWithColTypes(types2, vals2);
    tableNames.add(tableName2);

    String query = "SELECT " + tableName1 + "." + getColName(0) + ", " + tableName2 + "." + getColName(1) + " "
            + "FROM " + tableName1 + " JOIN " + tableName2 + " ON (" + tableName1 + "." + getColName(0) + " = "
            + tableName2 + "." + getColName(0) + ") WHERE " + tableName1 + "." + getColName(0)
            + " < 3 AND $CONDITIONS";

    runImport(getArgv(tableName1 + "." + getColName(0), query));

    Path warehousePath = new Path(this.getWarehouseDir());
    Path filePath = new Path(warehousePath, "part-m-00000");
    String expectedVal = "1,foo";

    BufferedReader reader = null;
    if (!isOnPhysicalCluster()) {
        reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(filePath.toString()))));
    } else {
        FileSystem dfs = FileSystem.get(getConf());
        FSDataInputStream dis = dfs.open(filePath);
        reader = new BufferedReader(new InputStreamReader(dis));
    }
    try {
        String line = reader.readLine();
        assertEquals("QueryResult expected a different string", expectedVal, line);
    } finally {
        IOUtils.closeStream(reader);
    }
}

From source file:com.cloudera.sqoop.TestMultiMaps.java

License:Apache License

public void runMultiMapTest(String splitByCol, int expectedSum) throws IOException {

    String[] columns = HsqldbTestServer.getFieldNames();
    ClassLoader prevClassLoader = null;
    SequenceFile.Reader reader = null;

    String[] argv = getArgv(true, columns, splitByCol);
    runImport(argv);//from w  w  w. ja v  a  2 s .  c  om
    try {
        ImportTool importTool = new ImportTool();
        SqoopOptions opts = importTool.parseArguments(getArgv(false, columns, splitByCol), null, null, true);

        CompilationManager compileMgr = new CompilationManager(opts);
        String jarFileName = compileMgr.getJarFilename();

        prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName());

        List<Path> paths = getDataFilePaths();
        Configuration conf = new Configuration();
        int curSum = 0;

        // We expect multiple files. We need to open all the files and sum up the
        // first column across all of them.
        for (Path p : paths) {
            reader = SeqFileReader.getSeqFileReader(p.toString());

            // here we can actually instantiate (k, v) pairs.
            Object key = ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Object val = ReflectionUtils.newInstance(reader.getValueClass(), conf);

            // We know that these values are two ints separated by a ','
            // character.  Since this is all dynamic, though, we don't want to
            // actually link against the class and use its methods. So we just
            // parse this back into int fields manually.  Sum them up and ensure
            // that we get the expected total for the first column, to verify that
            // we got all the results from the db into the file.

            // now sum up everything in the file.
            while (reader.next(key) != null) {
                reader.getCurrentValue(val);
                curSum += getFirstInt(val.toString());
            }

            IOUtils.closeStream(reader);
            reader = null;
        }

        assertEquals("Total sum of first db column mismatch", expectedSum, curSum);
    } catch (InvalidOptionsException ioe) {
        fail(ioe.toString());
    } catch (ParseException pe) {
        fail(pe.toString());
    } finally {
        IOUtils.closeStream(reader);

        if (null != prevClassLoader) {
            ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
        }
    }
}

From source file:com.cloudera.training.metrics.JobHistoryHelper.java

License:Apache License

public static JobHistory.JobInfo getJobInfoFromHdfsOutputDir(String outputDir, Configuration conf)
        throws IOException {
    Path output = new Path(outputDir);
    Path historyLogDir = new Path(output, "_logs/history");
    FileSystem fs = output.getFileSystem(conf);
    if (!fs.exists(output)) {
        throw new IOException("History directory " + historyLogDir.toString() + " does not exist");
    }//from  ww w . ja v a 2 s  . c om
    Path[] jobFiles = FileUtil.stat2Paths(fs.listStatus(historyLogDir, jobLogFileFilter));
    if (jobFiles.length == 0) {
        throw new IOException("Not a valid history directory " + historyLogDir.toString());
    }
    String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(jobFiles[0].getName()).split("_");
    String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4];
    JobHistory.JobInfo job = new JobHistory.JobInfo(jobId);
    DefaultJobHistoryParser.parseJobTasks(jobFiles[0].toString(), job, fs);
    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*from ww  w .ja  va  2s.c  o  m*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *//*from w  w  w . j  a v  a  2 s .com*/
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}