Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.marklogic.mapreduce.NodeInputFormat.java

License:Apache License

@Override
public RecordReader<NodePath, MarkLogicNode> createRecordReader(InputSplit arg0, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new NodeReader(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.NodeOutputFormat.java

License:Apache License

@Override
public RecordWriter<NodePath, MarkLogicNode> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new NodeWriter(conf, host);
}

From source file:com.marklogic.mapreduce.PropertyOutputFormat.java

License:Apache License

@Override
public RecordWriter<DocumentURI, MarkLogicNode> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);

    String host = InternalUtilities.getHost(hosts);
    return new PropertyWriter(conf, host);
}

From source file:com.marklogic.mapreduce.ValueInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new ValueReader<VALUEIN>(context.getConfiguration());
}

From source file:com.mcd.gdw.daas.mapreduce.ZipFileRecordReader.java

License:Apache License

/**
* Initialise and open the ZIP file from the FileSystem
*///from  w ww .j a v a2s .c  o m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {

    this.taskAttemptContext = taskAttemptContext;
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);
    zipfilename = path.getName();
    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    skipFilesonSize = taskAttemptContext.getConfiguration().get("skipFilesonSize");

    if (taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE") != null)
        MAX_FILE_SIZE = Long.parseLong(taskAttemptContext.getConfiguration().get("MAX_FILE_SIZE"));

}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormat.java

License:Apache License

/**
 * Retrieve a record writer for this RecordWriter. There are three config properties that are supported:
 * com.metamx.milano.hadoop.filePrefix -- A string to prefix the written file names with.
 * com.metamx.milano.hadoop.filePath   -- A string to postfix on the path. This lets you specify a subdirectory in which to put the files.
 * com.metamx.milano.proto.descriptor.base64 -- A string representing a base64 encoded DescriptorProto converted to bytes.
 * This is overridden if the metadata has already been set.
 *
 * @param job The {@link TaskAttemptContext} to use. See above for specific options.
 *
 * @return A {@link RecordWriter}/*from   w w w.j a  va2s  .  c  o m*/
 *
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public RecordWriter<K, Message> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    log.debug(String.format("Retrieving record writer"));
    Configuration conf = job.getConfiguration();

    String prefix = conf.get("com.metamx.milano.hadoop.filePrefix", "");
    String path = conf.get("com.metamx.milano.hadoop.filePath", ".");

    if (metadata == null) {
        String descriptorBytes = conf.get("com.metamx.milano.proto.descriptor.base64");
        if (descriptorBytes != null) {
            metadata = MilanoTool.withBase64(descriptorBytes).getMetadata();
        }
    }

    String filename = "";
    if (!prefix.equals("")) {
        filename = filename.concat(prefix + "_");
    }
    filename = filename.concat(job.getTaskAttemptID().getTaskID().toString());
    Path directory = new Path(((FileOutputCommitter) getOutputCommitter(job)).getWorkPath(), path);

    Path file = new Path(directory, filename);
    FileSystem fs = file.getFileSystem(conf);

    final OutputStream outputStream = fs.create(file);

    return new RecordWriter<K, Message>() {
        private MilanoProtoFile.Writer writer = MilanoProtoFile.createWriter(outputStream, metadata);

        @Override
        public void write(K key, Message value) throws IOException, InterruptedException {
            writer.write(value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            writer.flush();
            writer.close();
            log.debug("Closed Writer");
        }
    };
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormatTests.java

License:Apache License

@Test
public void testBuildAndReadProtoFile() throws Exception {
    MilanoProtoFileOutputFormat outputFormat = new MilanoProtoFileOutputFormat();

    MilanoTypeMetadata.TypeMetadata.Builder metadata = MilanoTool
            .with(Testing.TestItem.getDescriptor().getName(), Testing.getDescriptor()).getMetadata()
            .toBuilder();//  ww  w.  j  av a 2 s .  c  o m

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 1")
            .setValue(ByteString.copyFromUtf8("Value 1")));

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 2")
            .setValue(ByteString.copyFromUtf8("Value 2")));

    outputFormat.setMetadata(metadata.build());

    TaskAttemptContext context = protoTestObjects.getContext();
    Configuration conf = context.getConfiguration();

    @SuppressWarnings("unchecked")
    RecordWriter<String, Message> writer = outputFormat.getRecordWriter(context);

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        writer.write("dummy", protoTestObjects.getTestItem(i));
    }

    writer.close(protoTestObjects.getContext());
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    Configuration conf = context.getConfiguration();
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    //    fs.setVerifyChecksum(false);

    //Passing a null builder to createReader works as long as the file has metadata.
    in = MilanoProtoFile.createReader(fs.open(path, DEFAULT_BUFFER_SIZE), builder, extensionRegistry,
            fileSplit.getLength());/*w  w w. ja va2s .c  o m*/

    // This should always be not null.
    metadata = in.getMetadata();
    assert metadata != null;

    // We keep statistics on how much has been read to be able to report progress.
    start = in.getBytesRead();
    end = fileSplit.getLength();
    more = start < end;
}

From source file:com.mongodb.hadoop.BSONFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException {
    // Open data output stream

    Path outPath = getDefaultWorkFile(context, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(context.getConfiguration());
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }//from  w ww.ja v  a 2 s . c  o m

    long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null);
    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.input.BSONFileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    fileSplit = (FileSplit) inputSplit;//from   w w  w  .  java2s  . c  o  m
    final Configuration configuration = context.getConfiguration();
    if (LOG.isDebugEnabled()) {
        LOG.debug("reading split " + fileSplit);
    }
    Path file = fileSplit.getPath();
    FileSystem fs = file.getFileSystem(configuration);
    in = fs.open(file, 16 * 1024 * 1024);
    in.seek(fileSplit.getStart());

    if (MongoConfigUtil.getLazyBSON(configuration)) {
        callback = new LazyBSONCallback();
        decoder = new LazyBSONDecoder();
    } else {
        callback = new BasicBSONCallback();
        decoder = new BasicBSONDecoder();
    }
}