Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:eu.scape_project.pt.mets.hadoop.MetsRecordReader.java

License:Apache License

/**
 *
 * @param genericSplit//from   ww  w. j a va  2s.c o  m
 * @param context
 * @throws IOException
 * @throws InterruptedException
 */
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration jobConf = context.getConfiguration();

    FileSplit split = (FileSplit) genericSplit;

    // open the file and seek to the start of the split
    start = split.getStart();
    LOG.debug("start = " + start);
    end = start + split.getLength();
    LOG.debug("end = " + end);

    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(jobConf);
    fsin = fs.open(split.getPath());

    xml = new XmlFSUtil(fsin, end, tag);

    //xml.readDeclaration();
    //xml.readRootTag();
    xml.readDeclarationOrRootTag();

    fsin.seek(start);
}

From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java

License:Apache License

public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {
    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
}

From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java

License:Apache License

public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
        throws IOException {

    this.path = split.getPath(index);
    fs = this.path.getFileSystem(context.getConfiguration());
    this.startOffset = split.getOffset(index);
    this.end = startOffset + split.getLength(index);
    boolean skipFirstLine = false;

    //open the file
    fileIn = fs.open(path);/*from   w w w  . j  a  v  a  2  s . c  om*/
    if (startOffset != 0) {
        skipFirstLine = true;
        --startOffset;
        fileIn.seek(startOffset);
    }
    reader = new LineReader(fileIn);
    if (skipFirstLine) { // skip first line and re-establish "startOffset".
        startOffset += reader.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, end - startOffset));
    }
    this.pos = startOffset;
}

From source file:fire.util.fileformats.pdf.PdfRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {

    this.fileSplit = (FileSplit) split;
    this.conf = context.getConfiguration();

}

From source file:fire.util.fileformats.tika.TikaRecordReader.java

License:Apache License

public TikaRecordReader(CombineFileSplit split, TaskAttemptContext context) throws IOException {
    this.paths = split.getPaths();
    this.fs = FileSystem.get(context.getConfiguration());
    this.split = split;
}

From source file:format.OverlapLengthInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    int recordLength = getRecordLength(context.getConfiguration());
    int overlapLength = getOverlapLength(context.getConfiguration());
    if (recordLength <= 0) {
        throw new IOException("Fixed record length " + recordLength
                + " is invalid.  It should be set to a value greater than zero");
    }/*w w  w. j  av a 2  s.  c  o  m*/
    return new OverlapLengthRecordReader(recordLength, overlapLength);
}

From source file:format.OverlapRecordReader.java

License:BSD License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    start = split.getStart();//w  w w  .j  av  a2s .c o m
    end = start + split.getLength();
    final Path file = split.getPath();
    //Configuration job = HadoopUtils.getConfiguration(context);
    Configuration job = context.getConfiguration();
    maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE);

    FileSystem fs = file.getFileSystem(job);
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    if (codec == null) {
        throw new IOException("Codec for file " + file + " not found, cannot run");
    }

    // open the file and seek to the start of the split
    fileIn = fs.open(split.getPath());

    // creates input stream and also reads the file header
    in = new LineReader(codec.createInputStream(fileIn), job);

    if (start != 0) {
        fileIn.seek(start);

        // read and ignore the first line
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    this.pos = start;
}

From source file:format.WikipediaPageInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, WikipediaPage> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException, InterruptedException {
    context.setStatus(split.toString());
    return new WikipediaPageRecordReader(context.getConfiguration());
}

From source file:formats.test.TestJsonCustomVertexOutputFormat.java

License:MIT License

/**
 * Compare the json output by VertexWriter and by direct call of
 * Gson.toJson()/* w ww.jav a  2s  .c o m*/
 * 
 * @param toWrite
 *            Used to populate Vertex in Giraph, and to produce json string
 *            for comparison.
 * @throws IOException
 * @throws InterruptedException
 */
private void testWorker(VertexType toWrite) throws IOException, InterruptedException {
    TaskAttemptContext tac = mock(TaskAttemptContext.class);
    when(tac.getConfiguration()).thenReturn(conf);

    Vertex vertex = mock(Vertex.class);
    when(vertex.getId()).thenReturn(new LongWritable(toWrite.getId()));
    Coordinate coordinateToWrite = toWrite.getValues().getCoordinate();
    double weightToWrite = toWrite.getValues().getWeight();
    when(vertex.getValue()).thenReturn(new VertexValuesWritable(coordinateToWrite, weightToWrite));

    List<Edge<LongWritable, EdgeValuesWritable>> edges = Lists
            .newArrayListWithCapacity(toWrite.getEdges().size());
    for (int i = 0; i < toWrite.getEdges().size(); i++) {
        EdgeType edgeType = toWrite.getEdges().get(i);
        edges.add(EdgeFactory.create(new LongWritable(edgeType.getTargetId()),
                new EdgeValuesWritable(edgeType.getWeight())));
    }

    when(vertex.getEdges()).thenReturn(edges);

    final RecordWriter<Text, Text> tw = mock(RecordWriter.class);
    JsonCustomVertexWriter writer = new JsonCustomVertexWriter() {
        @Override
        protected RecordWriter<Text, Text> createLineRecordWriter(TaskAttemptContext context)
                throws IOException, InterruptedException {
            return tw;
        }
    };

    writer.setConf(conf);
    writer.initialize(tac);
    writer.writeVertex(vertex);

    Text expected = new Text(new Gson().toJson(toWrite));
    verify(tw).write(expected, null);
}

From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionOutputFormat.java

License:LGPL

@Override
public RecordWriter<Text, LongWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    boolean isCompressed = getCompressOutput(context);

    CompressionCodec codec = null;/*from   www .  ja  v a2 s  .com*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    // Get the output file path
    final Path file = getDefaultWorkFile(context, extension);

    final FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new ExpressionRecordWriter(context, fileOut);
    } else {

        FSDataOutputStream fileOut = fs.create(file, false);
        return new ExpressionRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut)));
    }
}