List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR
String OUTDIR
To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat OUTDIR.
Click Source Link
From source file:co.cask.cdap.data2.dataset2.lib.file.FileSetDataset.java
License:Apache License
@Override public Map<String, String> getOutputFormatConfiguration() { if (isExternal) { throw new UnsupportedOperationException( "Output is not supported for external file set '" + spec.getName() + "'"); }//from www. j a v a2 s.com ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); builder.putAll(FileSetProperties.getOutputProperties(spec.getProperties())); builder.putAll(FileSetProperties.getOutputProperties(runtimeArguments)); if (outputLocation != null) { builder.put(FileOutputFormat.OUTDIR, getFileSystemPath(outputLocation)); } return builder.build(); }
From source file:co.cask.cdap.data2.dataset2.lib.partitioned.TimePartitionedFileSetTest.java
License:Apache License
/** * Tests that the output file path is set correctly, based on the output partition time. *///from w ww. j ava 2 s . com @Test public void testOutputPartitionPath() throws Exception { // test specifying output time Date date = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT).parse("1/1/15 8:42 pm"); Map<String, String> args = Maps.newHashMap(); TimePartitionedFileSetArguments.setOutputPartitionTime(args, date.getTime()); TimeZone timeZone = Calendar.getInstance().getTimeZone(); TimePartitionedFileSetArguments.setOutputPathFormat(args, "yyyy-MM-dd/HH_mm", timeZone.getID()); TimePartitionedFileSet ds = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); String outputPath = ds.getEmbeddedFileSet().getOutputLocation().toURI().getPath(); Assert.assertTrue(outputPath.endsWith("2015-01-01/20_42")); Map<String, String> outputConfig = ds.getOutputFormatConfiguration(); Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42")); // test specifying output time and partition key -> time should prevail PartitionKey key = PartitionKey.builder().addIntField("year", 2014).addIntField("month", 1) .addIntField("day", 1).addIntField("hour", 20).addIntField("minute", 54).build(); TimePartitionedFileSet ds1 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); TimePartitionedFileSetArguments.setOutputPartitionKey(args, key); outputConfig = ds1.getOutputFormatConfiguration(); Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("2015-01-01/20_42")); args.clear(); TimePartitionedFileSetArguments.setOutputPartitionKey(args, key); TimePartitionedFileSet ds2 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); outputConfig = ds2.getOutputFormatConfiguration(); Assert.assertTrue(outputConfig.get(FileOutputFormat.OUTDIR).endsWith("54")); args.clear(); TimePartitionedFileSet ds3 = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); try { ds3.getOutputFormatConfiguration(); Assert.fail( "getOutputFormatConfiguration should have failed with neither output time nor partition key"); } catch (DataSetException e) { // expected } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set the {@link Path} of the output directory for the map-reduce job. * * @param job/*from w ww . j a v a 2s . c o m*/ * The job to modify * @param outputDir * the {@link Path} of the output directory for the map-reduce * job. */ public static void setOutputPath(Job job, Path outputDir) { try { outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir); } catch (IOException e) { // Throw the IOException as a RuntimeException to be compatible with // MR1 throw new RuntimeException(e); } job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString()); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the {@link Path} to the output directory for the map-reduce job. * /* ww w . j a v a 2 s .c o m*/ * @return the {@link Path} to the output directory for the map-reduce job. * @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext) */ public static Path getOutputPath(JobContext job) { String name = job.getConfiguration().get(FileOutputFormat.OUTDIR); return name == null ? null : new Path(name); }
From source file:cz.seznam.euphoria.hadoop.output.HadoopTextFileSink.java
License:Apache License
/** * Constructs a data sink based on hadoop's {@link TextOutputFormat}. * The specified path is automatically set/overridden in the given hadoop * configuration./* ww w . java2 s. com*/ * * @param path the path to read data from * @param hadoopConfig the hadoop configuration to build on top of * * @throws NullPointerException if any of the parameters is {@code null} */ @SuppressWarnings("unchecked") public HadoopTextFileSink(String path, Configuration hadoopConfig) { super((Class) TextOutputFormat.class, hadoopConfig); hadoopConfig.set(FileOutputFormat.OUTDIR, path); }
From source file:cz.seznam.euphoria.hadoop.output.SequenceFileSink.java
License:Apache License
/** * Constructs a data sink based on hadoop's {@link SequenceFileOutputFormat}. * The specified path is automatically set/overridden in the given hadoop * configuration as well as the key and value types. * * @param keyType the class representing the type of the keys emitted * @param valueType the class representing the type of the values emitted * @param path the path to read data from * @param hadoopConfig the hadoop configuration to build on top of * * @throws NullPointerException if any of the parameters is {@code null} *///from w w w.j a v a2s . c om @SuppressWarnings("unchecked") public SequenceFileSink(Class<K> keyType, Class<V> valueType, String path, Configuration hadoopConfig) { super((Class) SequenceFileOutputFormat.class, hadoopConfig); hadoopConfig.set(FileOutputFormat.OUTDIR, path); hadoopConfig.set(JobContext.OUTPUT_KEY_CLASS, keyType.getName()); hadoopConfig.set(JobContext.OUTPUT_VALUE_CLASS, valueType.getName()); }
From source file:nl.tudelft.graphalytics.giraph.GiraphJob.java
License:Apache License
/** * Creates a new Giraph job configuration and loads it with generic options * such as input and output paths, the number of workers, and the worker * heap size. It sets the computation and I/O format classes based on * the return value of their respective hooks. The configure method is called * to allow for job-specific configuration. Finally, the Giraph job is * submitted and is executed (blocking). * * @param args ignored//from w w w. j a v a 2 s .c o m * @return zero iff the job completed successfully */ @Override public final int run(String[] args) throws Exception { loadConfiguration(); // Prepare the job configuration GiraphConfiguration configuration = new GiraphConfiguration(getConf()); // Set the computation class configuration.setComputationClass(getComputationClass()); // Prepare input paths Path vertexInputPath = new Path(inputPath + ".v"); Path edgeInputPath = new Path(inputPath + ".e"); // Set input paths GiraphFileInputFormat.addVertexInputPath(configuration, vertexInputPath); GiraphFileInputFormat.addEdgeInputPath(configuration, edgeInputPath); // Set vertex/edge input format class configuration.setVertexInputFormatClass(getVertexInputFormatClass()); configuration.setEdgeInputFormatClass(getEdgeInputFormatClass()); // Set and output path and class configuration.set(FileOutputFormat.OUTDIR, outputPath); if (getVertexOutputFormatClass() != null) { configuration.setVertexOutputFormatClass(getVertexOutputFormatClass()); } else { configuration.setEdgeOutputFormatClass(getEdgeOutputFormatClass()); } // Set deployment-specific configuration from external configuration files configuration.setWorkerConfiguration(workerCount, workerCount, 100.0f); configuration.setZooKeeperConfiguration(zooKeeperAddress); configuration.setInt("mapreduce.map.memory.mb", workerMemory); configuration.set("mapreduce.map.java.opts", "-Xmx" + heapSize + "M"); // Set algorithm-specific configuration configure(configuration); // Create the Giraph job org.apache.giraph.job.GiraphJob job = new org.apache.giraph.job.GiraphJob(configuration, "Graphalytics: " + getClass().getSimpleName()); // Launch it LOG.debug("- Starting Giraph job"); return job.run(false) ? 0 : -1; }
From source file:org.apache.kylin.engine.mr.steps.ExtractDictionaryFromGlobalMapper.java
License:Apache License
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR)); globalDicts = Lists.newArrayListWithExpectedSize(globalColumns.size()); Map<TblColRef, Dictionary<String>> dictionaryMap = cubeSeg.buildDictionaryMap(); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); globalDicts.add(dictionaryMap.get(colRef)); }/*w ww .ja v a2s . c o m*/ ShrunkenDictionary.StringValueSerializer strValueSerializer = new ShrunkenDictionary.StringValueSerializer(); for (int i = 0; i < globalColumns.size(); i++) { List<String> colDistinctValues = Lists.newArrayList(globalColumnValues.get(i)); if (colDistinctValues.size() == 0) { continue; } // sort values to accelerate the encoding process by reducing the swapping of global dictionary slices Collections.sort(colDistinctValues); ShrunkenDictionaryBuilder<String> dictBuilder = new ShrunkenDictionaryBuilder<>(globalDicts.get(i)); for (String colValue : colDistinctValues) { dictBuilder.addValue(colValue); } Dictionary<String> shrunkenDict = dictBuilder.build(strValueSerializer); Path colDictDir = new Path(outputDirBase, globalColumns.get(i).getIdentity()); if (!fs.exists(colDictDir)) { fs.mkdirs(colDictDir); } try (DataOutputStream dos = fs.create(new Path(colDictDir, splitKey))) { shrunkenDict.write(dos); } } }
From source file:org.apache.kylin.engine.mr.steps.FilterRecommendCuboidDataMapper.java
License:Apache License
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close();//from w ww.jav a 2 s . c o m Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java
License:Apache License
private static void populateMRSettingsToRetain() { // FileInputFormat mrSettingsToRetain.add(FileInputFormat.INPUT_DIR); mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE); mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE); mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS); mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES); mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE); // FileOutputFormat mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME); mrSettingsToRetain.add(FileOutputFormat.COMPRESS); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE); mrSettingsToRetain.add(FileOutputFormat.OUTDIR); mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER); }