Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:eu.scape_project.pt.mets.hadoop.MetsRecordReader.java

License:Apache License

/**
 *
 * @param genericSplit//from   ww  w. j a va  2s.c o  m
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration jobConf = context.getConfiguration();

    FileSplit split = (FileSplit) genericSplit;

    // open the file and seek to the start of the split
    start = split.getStart();
    LOG.debug("start = " + start);
    end = start + split.getLength();
    LOG.debug("end = " + end);

    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(jobConf);
    fsin = fs.open(split.getPath());

    xml = new XmlFSUtil(fsin, end, tag);

    //xml.readDeclaration();
    //xml.readRootTag();
    xml.readDeclarationOrRootTag();

    fsin.seek(start);
}

From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java

License:Apache License

public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
}

From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java

License:Apache License

public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {

    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    boolean skipFirstLine = false;

    //open the file
    fileIn = fs.open(path);/*from   w w w  . j  a  v  a  2  s . c  om*/
    if (startOffset != 0) {
        skipFirstLine = true;
        --startOffset;
        fileIn.seek(startOffset);
    }
    reader = new LineReader(fileIn);
    if (skipFirstLine) { // skip first line and re-establish "startOffset".
        startOffset += reader.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
    }
    this.pos = startOffset;
}

From source file:fire.util.fileformats.pdf.PdfRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    this.fileSplit = (FileSplit) split;
    this.conf = context.getConfiguration();

}

From source file:fire.util.fileformats.tika.TikaRecordReader.java

License:Apache License

public TikaRecordReader(CombineFileSplit split, TaskAttemptContext context) throws IOException {
    this.paths = split.getPaths();
    this.fs = FileSystem.get(context.getConfiguration());
    this.split = split;
}

From source file:format.OverlapLengthInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    int recordLength = getRecordLength(context.getConfiguration());
    int overlapLength = getOverlapLength(context.getConfiguration());
    if (recordLength <= 0) {
        throw new IOException("Fixed record length " + recordLength
                + " is invalid.  It should be set to a value greater than zero");
    }/*w w  w. j  av a 2  s.  c  o  m*/
    return new OverlapLengthRecordReader(recordLength, overlapLength);
}

From source file:format.OverlapRecordReader.java

License:BSD License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//w  w w  .j  av  a2s .c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    //Configuration job = HadoopUtils.getConfiguration(context);
    Configuration job = context.getConfiguration();
    maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:format.WikipediaPageInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, WikipediaPage> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    context.setStatus(split.toString());
    return new WikipediaPageRecordReader(context.getConfiguration());
}

From source file:formats.test.TestJsonCustomVertexOutputFormat.java

License:MIT License

/**
 * Compare the json output by VertexWriter and by direct call of
 * Gson.toJson()/* w ww.jav a  2s  .c o m*/
 * 
 * @param toWrite
 *            Used to populate Vertex in Giraph, and to produce json string
 *            for comparison.
 * @throws IOException
 * @throws InterruptedException
 */
private void testWorker(VertexType toWrite) throws IOException, InterruptedException {
    TaskAttemptContext tac = mock(TaskAttemptContext.class);
    when(tac.getConfiguration()).thenReturn(conf);

    Vertex vertex = mock(Vertex.class);
    when(vertex.getId()).thenReturn(new LongWritable(toWrite.getId()));
    Coordinate coordinateToWrite = toWrite.getValues().getCoordinate();
    double weightToWrite = toWrite.getValues().getWeight();
    when(vertex.getValue()).thenReturn(new VertexValuesWritable(coordinateToWrite, weightToWrite));

    List<Edge<LongWritable, EdgeValuesWritable>> edges = Lists
            .newArrayListWithCapacity(toWrite.getEdges().size());
    for (int i = 0; i < toWrite.getEdges().size(); i++) {
        EdgeType edgeType = toWrite.getEdges().get(i);
        edges.add(EdgeFactory.create(new LongWritable(edgeType.getTargetId()),
                new EdgeValuesWritable(edgeType.getWeight())));
    }

    when(vertex.getEdges()).thenReturn(edges);

    final RecordWriter<Text, Text> tw = mock(RecordWriter.class);
    JsonCustomVertexWriter writer = new JsonCustomVertexWriter() {
        @Override
        protected RecordWriter<Text, Text> createLineRecordWriter(TaskAttemptContext context)
                throws IOException, InterruptedException {
            return tw;
        }
    };

    writer.setConf(conf);
    writer.initialize(tac);
    writer.writeVertex(vertex);

    Text expected = new Text(new Gson().toJson(toWrite));
    verify(tw).write(expected, null);
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, LongWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;/*from   www .  ja  v a2 s  .com*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new ExpressionRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new ExpressionRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}