Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR.

Prototype

String OUTDIR

To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR.

Click Source Link

Document

Destination directory of work: .

Usage

From source file:co.cask.cdap.data2.dataset2.lib.file.FileSetDataset.java

License:Apache License

@Override
public Map<String, String> getOutputFormatConfiguration() {
    if (isExternal) {
        throw new UnsupportedOperationException(
                "Output is not supported for external file set '" + spec.getName() + "'");
    }//from   www. j  a  v a2  s.com
    ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
    builder.putAll(FileSetProperties.getOutputProperties(spec.getProperties()));
    builder.putAll(FileSetProperties.getOutputProperties(runtimeArguments));
    if (outputLocation != null) {
        builder.put(FileOutputFormat.OUTDIR, getFileSystemPath(outputLocation));
    }
    return builder.build();
}

From source file:co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetTest.java

License:Apache License

/**
 * Tests that the output file path is set correctly, based on the output partition time.
 *///from   w  ww. j  ava  2  s . com
@Test
public void testOutputPartitionPath() throws Exception {
    // test specifying output time
    Date date = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT).parse("1/1/15 8:42 pm");
    Map<String, String> args = Maps.newHashMap();
    TimePartitionedFileSetArguments.setOutputPartitionTime(args, date.getTime());
    TimeZone timeZone = Calendar.getInstance().getTimeZone();
    TimePartitionedFileSetArguments.setOutputPathFormat(args, "yyyy-MM-dd/HH_mm", timeZone.getID());
    TimePartitionedFileSet ds = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);

    String outputPath = ds.getEmbeddedFileSet().getOutputLocation().toURI().getPath();
    Assert.assertTrue(outputPath.endsWith("2015-01-01/20_42"));

    Map<String, String> outputConfig = ds.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));

    // test specifying output time and partition key -> time should prevail
    PartitionKey key = PartitionKey.builder().addIntField("year", 2014).addIntField("month", 1)
            .addIntField("day", 1).addIntField("hour", 20).addIntField("minute", 54).build();
    TimePartitionedFileSet ds1 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
    outputConfig = ds1.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42"));

    args.clear();
    TimePartitionedFileSetArguments.setOutputPartitionKey(args, key);
    TimePartitionedFileSet ds2 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    outputConfig = ds2.getOutputFormatConfiguration();
    Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("54"));

    args.clear();
    TimePartitionedFileSet ds3 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args);
    try {
        ds3.getOutputFormatConfiguration();
        Assert.fail(
                "getOutputFormatConfiguration should have failed with neither output time nor partition key");
    } catch (DataSetException e) {
        // expected
    }
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set the {@link Path} of the output directory for the map-reduce job.
 *
 * @param job/*from   w  ww . j a v  a 2s .  c o  m*/
 *            The job to modify
 * @param outputDir
 *            the {@link Path} of the output directory for the map-reduce
 *            job.
 */
public static void setOutputPath(Job job, Path outputDir) {
    try {
        outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir);
    } catch (IOException e) {
        // Throw the IOException as a RuntimeException to be compatible with
        // MR1
        throw new RuntimeException(e);
    }
    job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Get the {@link Path} to the output directory for the map-reduce job.
 * /*  ww  w  .  j a v  a 2  s  .c o m*/
 * @return the {@link Path} to the output directory for the map-reduce job.
 * @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)
 */
public static Path getOutputPath(JobContext job) {
    String name = job.getConfiguration().get(FileOutputFormat.OUTDIR);
    return name == null ? null : new Path(name);
}

From source file:cz.seznam.euphoria.hadoop.output.HadoopTextFileSink.java

License:Apache License

/**
 * Constructs a data sink based on hadoop's {@link TextOutputFormat}.
 * The specified path is automatically set/overridden in the given hadoop
 * configuration./*  ww w .  java2  s. com*/
 *
 * @param path the path to read data from
 * @param hadoopConfig the hadoop configuration to build on top of
 *
 * @throws NullPointerException if any of the parameters is {@code null}
 */
@SuppressWarnings("unchecked")
public HadoopTextFileSink(String path, Configuration hadoopConfig) {
    super((Class) TextOutputFormat.class, hadoopConfig);
    hadoopConfig.set(FileOutputFormat.OUTDIR, path);
}

From source file:cz.seznam.euphoria.hadoop.output.SequenceFileSink.java

License:Apache License

/**
 * Constructs a data sink based on hadoop's {@link SequenceFileOutputFormat}.
 * The specified path is automatically set/overridden in the given hadoop
 * configuration as well as the key and value types.
 *
 * @param keyType the class representing the type of the keys emitted
 * @param valueType the class representing the type of the values emitted
 * @param path the path to read data from
 * @param hadoopConfig the hadoop configuration to build on top of
 *
 * @throws NullPointerException if any of the parameters is {@code null}
 *///from w  w  w.j a v a2s . c om
@SuppressWarnings("unchecked")
public SequenceFileSink(Class<K> keyType, Class<V> valueType, String path, Configuration hadoopConfig) {
    super((Class) SequenceFileOutputFormat.class, hadoopConfig);
    hadoopConfig.set(FileOutputFormat.OUTDIR, path);
    hadoopConfig.set(JobContext.OUTPUT_KEY_CLASS, keyType.getName());
    hadoopConfig.set(JobContext.OUTPUT_VALUE_CLASS, valueType.getName());
}

From source file:nl.tudelft.graphalytics.giraph.GiraphJob.java

License:Apache License

/**
 * Creates a new Giraph job configuration and loads it with generic options
 * such as input and output paths, the number of workers, and the worker
 * heap size. It sets the computation and I/O format classes based on
 * the return value of their respective hooks. The configure method is called
 * to allow for job-specific configuration. Finally, the Giraph job is
 * submitted and is executed (blocking).
 *
 * @param args ignored//from  w  w w.  j  a v  a  2 s .c o m
 * @return zero iff the job completed successfully
 */
@Override
public final int run(String[] args) throws Exception {
    loadConfiguration();

    // Prepare the job configuration
    GiraphConfiguration configuration = new GiraphConfiguration(getConf());

    // Set the computation class
    configuration.setComputationClass(getComputationClass());

    // Prepare input paths
    Path vertexInputPath = new Path(inputPath + ".v");
    Path edgeInputPath = new Path(inputPath + ".e");

    // Set input paths
    GiraphFileInputFormat.addVertexInputPath(configuration, vertexInputPath);
    GiraphFileInputFormat.addEdgeInputPath(configuration, edgeInputPath);

    // Set vertex/edge input format class
    configuration.setVertexInputFormatClass(getVertexInputFormatClass());
    configuration.setEdgeInputFormatClass(getEdgeInputFormatClass());

    // Set and output path and class
    configuration.set(FileOutputFormat.OUTDIR, outputPath);
    if (getVertexOutputFormatClass() != null) {
        configuration.setVertexOutputFormatClass(getVertexOutputFormatClass());
    } else {
        configuration.setEdgeOutputFormatClass(getEdgeOutputFormatClass());
    }

    // Set deployment-specific configuration from external configuration files
    configuration.setWorkerConfiguration(workerCount, workerCount, 100.0f);
    configuration.setZooKeeperConfiguration(zooKeeperAddress);
    configuration.setInt("mapreduce.map.memory.mb", workerMemory);
    configuration.set("mapreduce.map.java.opts", "-Xmx" + heapSize + "M");

    // Set algorithm-specific configuration
    configure(configuration);

    // Create the Giraph job
    org.apache.giraph.job.GiraphJob job = new org.apache.giraph.job.GiraphJob(configuration,
            "Graphalytics: " + getClass().getSimpleName());
    // Launch it
    LOG.debug("- Starting Giraph job");
    return job.run(false) ? 0 : -1;
}

From source file:org.apache.kylin.engine.mr.steps.ExtractDictionaryFromGlobalMapper.java

License:Apache License

@Override
protected void doCleanup(Context context) throws IOException, InterruptedException {
    FileSystem fs = FileSystem.get(context.getConfiguration());
    Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR));

    globalDicts = Lists.newArrayListWithExpectedSize(globalColumns.size());
    Map<TblColRef, Dictionary<String>> dictionaryMap = cubeSeg.buildDictionaryMap();
    for (int i = 0; i < globalColumns.size(); i++) {
        TblColRef colRef = globalColumns.get(i);
        globalDicts.add(dictionaryMap.get(colRef));
    }/*w  ww  .ja v a2s . c o  m*/

    ShrunkenDictionary.StringValueSerializer strValueSerializer = new ShrunkenDictionary.StringValueSerializer();
    for (int i = 0; i < globalColumns.size(); i++) {
        List<String> colDistinctValues = Lists.newArrayList(globalColumnValues.get(i));
        if (colDistinctValues.size() == 0) {
            continue;
        }
        // sort values to accelerate the encoding process by reducing the swapping of global dictionary slices
        Collections.sort(colDistinctValues);

        ShrunkenDictionaryBuilder<String> dictBuilder = new ShrunkenDictionaryBuilder<>(globalDicts.get(i));
        for (String colValue : colDistinctValues) {
            dictBuilder.addValue(colValue);
        }
        Dictionary<String> shrunkenDict = dictBuilder.build(strValueSerializer);

        Path colDictDir = new Path(outputDirBase, globalColumns.get(i).getIdentity());
        if (!fs.exists(colDictDir)) {
            fs.mkdirs(colDictDir);
        }
        try (DataOutputStream dos = fs.create(new Path(colDictDir, splitKey))) {
            shrunkenDict.write(dos);
        }
    }
}

From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java

License:Apache License

@Override
public void doCleanup(Context context) throws IOException, InterruptedException {
    mos.close();//from   w ww.jav a 2 s  . c  o m

    Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase);
    FileSystem fs = FileSystem.get(context.getConfiguration());
    if (!fs.exists(outputDirBase)) {
        fs.mkdirs(outputDirBase);
        SequenceFile
                .createWriter(context.getConfiguration(),
                        SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")),
                        SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class))
                .close();
    }
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java

License:Apache License

private static void populateMRSettingsToRetain() {

    // FileInputFormat
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
    mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
    mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);

    // FileOutputFormat
    mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
    mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
    mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}