Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getUniqueFile

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getUniqueFile

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getUniqueFile.

Prototype

public synchronized static String getUniqueFile(TaskAttemptContext context, String name, String extension) 

Source Link

Document

Generate a unique filename, based on the task id, name, and extension

Usage

From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java

License:Apache License

/**
 * Creates a new {@link RecordWriter} to output temporary data.
 * @param <V> value type/* www. j  a v a  2s .  c om*/
 * @param context current context
 * @param name output name
 * @param dataType value type
 * @return the created writer
 * @throws IOException if failed to create a new {@link RecordWriter}
 * @throws InterruptedException if interrupted
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name,
        Class<V> dataType) throws IOException, InterruptedException {
    if (context == null) {
        throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
    }
    if (name == null) {
        throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$
    }
    if (dataType == null) {
        throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$
    }
    CompressionCodec codec = null;
    Configuration conf = context.getConfiguration();
    if (FileOutputFormat.getCompressOutput(context)) {
        Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    FileOutputCommitter committer = getOutputCommitter(context);
    final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$
    final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec);
    return new RecordWriter<NullWritable, V>() {

        @Override
        public void write(NullWritable key, V value) throws IOException {
            out.write(value);
        }

        @Override
        public void close(TaskAttemptContext ignored) throws IOException {
            out.close();
        }

        @Override
        public String toString() {
            return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$
        }
    };
}

From source file:com.linkedin.cubert.examples.Purge.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    conf = PhaseContext.getConf();/*from  w w w. ja  v  a2  s  .  com*/
    output = TupleFactory.getInstance().newTuple(3);
    purgeFileName = FileCache.get(filesToCache.get(0));

    if (purgeFileName == null) {
        throw new IOException("purgeFileName is null");
    }

    loadMembersToPurge(purgeFileName);

    String columnName = JsonUtils.getText(json.get("args"), "purgeColumnName");
    setColumnName(columnName);

    // Create temp file
    Path root = null;
    String filename = null;
    tempFileName = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), "tempFileForPurge", "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), "tempFileForPurge", "");
    }

    tempFileName = root + "/" + filename;
}

From source file:com.linkedin.cubert.operator.TeeOperator.java

License:Open Source License

@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props)
        throws IOException, InterruptedException {
    block = input.values().iterator().next();
    String prefix = JsonUtils.getText(json, "prefix");

    BlockSchema teeSchema = new BlockSchema(json.get("teeSchema"));

    if (json.has("generate") && !json.get("generate").isNull()) {
        ObjectNode generateJson = JsonUtils.createObjectNode("name", "GENERATE", "input", json.get("input"),
                "output", json.get("input"), "outputTuple", json.get("generate"));

        generateOperator = new GenerateOperator();

        BlockProperties generateProps = new BlockProperties("teeGenerate", teeSchema, props);
        generateOperator.setInput(input, generateJson, generateProps);
    }//from  w w w.j a  va2 s .  c  o m

    Configuration conf = PhaseContext.getConf();

    Path root = null;
    String filename = null;

    if (PhaseContext.isMapper()) {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(), prefix, "");
    } else {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename = FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(), prefix, "");
    }

    writer = openedWriters.get(prefix);

    if (writer == null) {
        writer = StorageFactory.get(JsonUtils.getText(json, "type")).getTeeWriter();
        writer.open(conf, json, teeSchema, root, filename);
        openedWriters.put(prefix, writer);
    }

    if (json.has("filter") && json.get("filter") != null && !json.get("filter").isNull()) {
        JsonNode filterJson = json.get("filter");
        filterTree = new FunctionTree(block);
        try {
            filterTree.addFunctionTree(filterJson);
        } catch (PreconditionException e) {
            throw new RuntimeException(e);
        }

    }
}

From source file:org.apache.crunch.io.avro.trevni.TrevniRecordWriter.java

License:Apache License

public TrevniRecordWriter(TaskAttemptContext context) throws IOException {
    schema = initSchema(context);//from   ww w  . j av  a 2  s.c o m
    meta = filterMetadata(context.getConfiguration());
    writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

    Path outputPath = FileOutputFormat.getOutputPath(context);

    String dir = FileOutputFormat.getUniqueFile(context, "part", "");
    dirPath = new Path(outputPath.toString() + "/" + dir);
    fs = dirPath.getFileSystem(context.getConfiguration());
    fs.mkdirs(dirPath);

    blockSize = fs.getDefaultBlockSize();
}

From source file:org.apache.gora.mapreduce.GoraOutputFormat.java

License:Apache License

private void setOutputPath(DataStore<K, T> store, TaskAttemptContext context) {
    if (store instanceof FileBackedDataStore) {
        FileBackedDataStore<K, T> fileStore = (FileBackedDataStore<K, T>) store;
        String uniqueName = FileOutputFormat.getUniqueFile(context, "part", "");

        //if file store output is not set, then get the output from FileOutputFormat
        if (fileStore.getOutputPath() == null) {
            fileStore.setOutputPath(FileOutputFormat.getOutputPath(context).toString());
        }//from www .j av a2  s .  c  o  m

        //set the unique name of the data file
        String path = fileStore.getOutputPath();
        fileStore.setOutputPath(path + Path.SEPARATOR + uniqueName);
    }
}

From source file:org.apache.hcatalog.mapreduce.FileRecordWriterContainer.java

License:Apache License

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {

    org.apache.hadoop.mapred.RecordWriter localWriter;
    ObjectInspector localObjectInspector;
    SerDe localSerDe;/*from www.  j a v  a 2 s .  c om*/
    OutputJobInfo localJobInfo = null;

    if (dynamicPartitioningUsed) {
        // calculate which writer to use from the remaining values - this needs to be done before we delete cols
        List<String> dynamicPartValues = new ArrayList<String>();
        for (Integer colToAppend : dynamicPartCols) {
            dynamicPartValues.add(value.get(colToAppend).toString());
        }

        String dynKey = dynamicPartValues.toString();
        if (!baseDynamicWriters.containsKey(dynKey)) {
            if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
                throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                        "Number of dynamic partitions being created "
                                + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                                + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                                + "] if needed.");
            }

            org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                    .createTaskAttemptContext(context);
            configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
            localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext);

            //setup serDe
            SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                    currTaskContext.getJobConf());
            try {
                InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
            } catch (SerDeException e) {
                throw new IOException("Failed to initialize SerDe", e);
            }

            //create base OutputFormat
            org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                    .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

            //We are skipping calling checkOutputSpecs() for each partition
            //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition
            //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance
            //In general this should be ok for most FileOutputFormat implementations
            //but may become an issue for cases when the method is used to perform other setup tasks

            //get Output Committer
            org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                    .getOutputCommitter();
            //create currJobContext the latest so it gets all the config changes
            org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil
                    .createJobContext(currTaskContext);
            //setupJob()
            baseOutputCommitter.setupJob(currJobContext);
            //recreate to refresh jobConf of currTask context
            currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                    currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());
            //set temp location
            currTaskContext.getConfiguration().set("mapred.work.output.dir",
                    new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                            .toString());
            //setupTask()
            baseOutputCommitter.setupTask(currTaskContext);

            Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
            Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext, "part", ""));

            org.apache.hadoop.mapred.RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                    parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                    childPath.toString(), InternalUtil.createReporter(currTaskContext));

            baseDynamicWriters.put(dynKey, baseRecordWriter);
            baseDynamicSerDe.put(dynKey, currSerDe);
            baseDynamicCommitters.put(dynKey, baseOutputCommitter);
            dynamicContexts.put(dynKey, currTaskContext);
            dynamicObjectInspectors.put(dynKey,
                    InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
            dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey)));
        }

        localJobInfo = dynamicOutputJobInfo.get(dynKey);
        localWriter = baseDynamicWriters.get(dynKey);
        localSerDe = baseDynamicSerDe.get(dynKey);
        localObjectInspector = dynamicObjectInspectors.get(dynKey);
    } else {
        localJobInfo = jobInfo;
        localWriter = getBaseRecordWriter();
        localSerDe = serDe;
        localObjectInspector = objectInspector;
    }

    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }

    //The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}

From source file:org.apache.hive.hcatalog.mapreduce.DynamicPartitionFileRecordWriterContainer.java

License:Apache License

@Override
protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException {
    OutputJobInfo localJobInfo = null;//  w w  w . j av a  2  s .  c om
    // Calculate which writer to use from the remaining values - this needs to
    // be done before we delete cols.
    List<String> dynamicPartValues = new ArrayList<String>();
    for (Integer colToAppend : dynamicPartCols) {
        Object partitionValue = value.get(colToAppend);
        dynamicPartValues
                .add(partitionValue == null ? HIVE_DEFAULT_PARTITION_VALUE : partitionValue.toString());
    }

    String dynKey = dynamicPartValues.toString();
    if (!baseDynamicWriters.containsKey(dynKey)) {
        if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) {
            throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS,
                    "Number of dynamic partitions being created "
                            + "exceeds configured max allowable partitions[" + maxDynamicPartitions
                            + "], increase parameter [" + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname
                            + "] if needed.");
        }

        org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil
                .createTaskAttemptContext(context);
        configureDynamicStorageHandler(currTaskContext, dynamicPartValues);
        localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext.getConfiguration());

        // Setup serDe.
        SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(),
                currTaskContext.getJobConf());
        try {
            InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo);
        } catch (SerDeException e) {
            throw new IOException("Failed to initialize SerDe", e);
        }

        // create base OutputFormat
        org.apache.hadoop.mapred.OutputFormat baseOF = ReflectionUtils
                .newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf());

        // We are skipping calling checkOutputSpecs() for each partition
        // As it can throw a FileAlreadyExistsException when more than one
        // mapper is writing to a partition.
        // See HCATALOG-490, also to avoid contacting the namenode for each new
        // FileOutputFormat instance.
        // In general this should be ok for most FileOutputFormat implementations
        // but may become an issue for cases when the method is used to perform
        // other setup tasks.

        // Get Output Committer
        org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf()
                .getOutputCommitter();

        // Create currJobContext the latest so it gets all the config changes
        org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext);

        // Set up job.
        baseOutputCommitter.setupJob(currJobContext);

        // Recreate to refresh jobConf of currTask context.
        currTaskContext = HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(),
                currTaskContext.getTaskAttemptID(), currTaskContext.getProgressible());

        // Set temp location.
        currTaskContext.getConfiguration().set("mapred.work.output.dir",
                new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath()
                        .toString());

        // Set up task.
        baseOutputCommitter.setupTask(currTaskContext);

        Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueFile(currTaskContext,
                currTaskContext.getConfiguration().get("mapreduce.output.basename", "part"), ""));

        RecordWriter baseRecordWriter = baseOF.getRecordWriter(
                parentDir.getFileSystem(currTaskContext.getConfiguration()), currTaskContext.getJobConf(),
                childPath.toString(), InternalUtil.createReporter(currTaskContext));

        baseDynamicWriters.put(dynKey, baseRecordWriter);
        baseDynamicSerDe.put(dynKey, currSerDe);
        baseDynamicCommitters.put(dynKey, baseOutputCommitter);
        dynamicContexts.put(dynKey, currTaskContext);
        dynamicObjectInspectors.put(dynKey,
                InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()));
        dynamicOutputJobInfo.put(dynKey,
                HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey).getConfiguration()));
    }

    return new LocalFileWriter(baseDynamicWriters.get(dynKey), dynamicObjectInspectors.get(dynKey),
            baseDynamicSerDe.get(dynKey), dynamicOutputJobInfo.get(dynKey));
}

From source file:org.apache.trevni.avro.mapreduce.AvroTrevniRecordWriterBase.java

License:Apache License

/**
 * Constructor.//  w  w  w.j a  v  a2  s .co m
 * @param context The TaskAttempContext to supply the writer with information form the job configuration
 */
public AvroTrevniRecordWriterBase(TaskAttemptContext context) throws IOException {

    schema = initSchema(context);
    meta = filterMetadata(context.getConfiguration());
    writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

    Path outputPath = FileOutputFormat.getOutputPath(context);

    String dir = FileOutputFormat.getUniqueFile(context, "part", "");
    dirPath = new Path(outputPath.toString() + "/" + dir);
    fs = dirPath.getFileSystem(context.getConfiguration());
    fs.mkdirs(dirPath);

    blockSize = fs.getDefaultBlockSize();
}