Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.marklogic.mapreduce.NodeInputFormat.java

License:Apache License

@Override
public RecordReader<NodePath, MarkLogicNode> createRecordReader(InputSplit arg0, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new NodeReader(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.NodeOutputFormat.java

License:Apache License

@Override
public RecordWriter<NodePath, MarkLogicNode> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new NodeWriter(conf, host);
}

From source file:com.marklogic.mapreduce.PropertyOutputFormat.java

License:Apache License

@Override
public RecordWriter<DocumentURI, MarkLogicNode> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);

    String host = InternalUtilities.getHost(hosts);
    return new PropertyWriter(conf, host);
}

From source file:com.marklogic.mapreduce.ValueInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new ValueReader<VALUEIN>(context.getConfiguration());
}

From source file:com.mcd.gdw.daas.mapreduce.ZipFileRecordReader.java

License:Apache License

/**
* Initialise and open the ZIP file from the FileSystem
*///from  w ww .j a v a2s .c  o m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    this.taskAttemptContext = taskAttemptContext;
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);
    zipfilename = path.getName();
    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    skipFilesonSize = taskAttemptContext.getConfiguration().get("skipFilesonSize");

    if (taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE") != null)
        MAX_FILE_SIZE = Long.parseLong(taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE"));

}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormat.java

License:Apache License

/**
 * Retrieve a record writer for this RecordWriter. There are three config properties that are supported:
 * com.metamx.milano.hadoop.filePrefix -- A string to prefix the written file names with.
 * com.metamx.milano.hadoop.filePath   -- A string to postfix on the path. This lets you specify a subdirectory in which to put the files.
 * com.metamx.milano.proto.descriptor.base64 -- A string representing a base64 encoded DescriptorProto converted to bytes.
 * This is overridden if the metadata has already been set.
 *
 * @param job The {@link TaskAttemptContext} to use. See above for specific options.
 *
 * @return A {@link RecordWriter}/*from   w w w.j a  va2s  .  c  o m*/
 *
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public RecordWriter<K, Message> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    log.debug(String.format("Retrieving record writer"));
    Configuration conf = job.getConfiguration();

    String prefix = conf.get("com.metamx.milano.hadoop.filePrefix", "");
    String path = conf.get("com.metamx.milano.hadoop.filePath", ".");

    if (metadata == null) {
        String descriptorBytes = conf.get("com.metamx.milano.proto.descriptor.base64");
        if (descriptorBytes != null) {
            metadata = MilanoTool.withBase64(descriptorBytes).getMetadata();
        }
    }

    String filename = "";
    if (!prefix.equals("")) {
        filename = filename.concat(prefix + "_");
    }
    filename = filename.concat(job.getTaskAttemptID().getTaskID().toString());
    Path directory = new Path(((FileOutputCommitter) getOutputCommitter(job)).getWorkPath(), path);

    Path file = new Path(directory, filename);
    FileSystem fs = file.getFileSystem(conf);

    final OutputStream outputStream = fs.create(file);

    return new RecordWriter<K, Message>() {
        private MilanoProtoFile.Writer writer = MilanoProtoFile.createWriter(outputStream, metadata);

        @Override
        public void write(K key, Message value) throws IOException, InterruptedException {
            writer.write(value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            writer.flush();
            writer.close();
            log.debug("Closed Writer");
        }
    };
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormatTests.java

License:Apache License

@Test
public void testBuildAndReadProtoFile() throws Exception {
    MilanoProtoFileOutputFormat outputFormat = new MilanoProtoFileOutputFormat();

    MilanoTypeMetadata.TypeMetadata.Builder metadata = MilanoTool
            .with(Testing.TestItem.getDescriptor().getName(), Testing.getDescriptor()).getMetadata()
            .toBuilder();//  ww  w.  j  av a 2 s .  c  o m

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 1")
            .setValue(ByteString.copyFromUtf8("Value 1")));

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 2")
            .setValue(ByteString.copyFromUtf8("Value 2")));

    outputFormat.setMetadata(metadata.build());

    TaskAttemptContext context = protoTestObjects.getContext();
    Configuration conf = context.getConfiguration();

    @SuppressWarnings("unchecked")
    RecordWriter<String, Message> writer = outputFormat.getRecordWriter(context);

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        writer.write("dummy", protoTestObjects.getTestItem(i));
    }

    writer.close(protoTestObjects.getContext());
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    //    fs.setVerifyChecksum(false);

    //Passing a null builder to createReader works as long as the file has metadata.
    in = MilanoProtoFile.createReader(fs.open(path, DEFAULT_BUFFER_SIZE), builder, extensionRegistry,
            fileSplit.getLength());/*w  w w. ja va2s .c  o m*/

    // This should always be not null.
    metadata = in.getMetadata();
    assert metadata != null;

    // We keep statistics on how much has been read to be able to report progress.
    start = in.getBytesRead();
    end = fileSplit.getLength();
    more = start < end;
}

From source file:com.mongodb.hadoop.BSONFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException {
    // Open data output stream

    Path outPath = getDefaultWorkFile(context, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(context.getConfiguration());
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }//from  w ww.ja v  a 2 s . c  o m

    long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null);
    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.input.BSONFileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    fileSplit = (FileSplit) inputSplit;//from   w w  w  .  java2s  . c  o  m
    final Configuration configuration = context.getConfiguration();
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(fileSplit.getStart());

    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}