List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:eu.scape_project.pt.mets.hadoop.MetsRecordReader.java
License:Apache License
/** * * @param genericSplit//from ww w. j a va 2s.c o m * @param context * @throws IOException * @throws InterruptedException */ public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { Configuration jobConf = context.getConfiguration(); FileSplit split = (FileSplit) genericSplit; // open the file and seek to the start of the split start = split.getStart(); LOG.debug("start = " + start); end = start + split.getLength(); LOG.debug("end = " + end); Path file = split.getPath(); FileSystem fs = file.getFileSystem(jobConf); fsin = fs.open(split.getPath()); xml = new XmlFSUtil(fsin, end, tag); //xml.readDeclaration(); //xml.readRootTag(); xml.readDeclarationOrRootTag(); fsin.seek(start); }
From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java
License:Apache License
public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); }
From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java
License:Apache License
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path);/*from w w w . j a v a 2 s . c om*/ if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
From source file:fire.util.fileformats.pdf.PdfRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.fileSplit = (FileSplit) split; this.conf = context.getConfiguration(); }
From source file:fire.util.fileformats.tika.TikaRecordReader.java
License:Apache License
public TikaRecordReader(CombineFileSplit split, TaskAttemptContext context) throws IOException { this.paths = split.getPaths(); this.fs = FileSystem.get(context.getConfiguration()); this.split = split; }
From source file:format.OverlapLengthInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { int recordLength = getRecordLength(context.getConfiguration()); int overlapLength = getOverlapLength(context.getConfiguration()); if (recordLength <= 0) { throw new IOException("Fixed record length " + recordLength + " is invalid. It should be set to a value greater than zero"); }/*w w w. j av a 2 s. c o m*/ return new OverlapLengthRecordReader(recordLength, overlapLength); }
From source file:format.OverlapRecordReader.java
License:BSD License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; start = split.getStart();//w w w .j av a2s .c o m end = start + split.getLength(); final Path file = split.getPath(); //Configuration job = HadoopUtils.getConfiguration(context); Configuration job = context.getConfiguration(); maxLineLen = job.getInt(MAX_LINE_LEN_CONF, Integer.MAX_VALUE); FileSystem fs = file.getFileSystem(job); CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); if (codec == null) { throw new IOException("Codec for file " + file + " not found, cannot run"); } // open the file and seek to the start of the split fileIn = fs.open(split.getPath()); // creates input stream and also reads the file header in = new LineReader(codec.createInputStream(fileIn), job); if (start != 0) { fileIn.seek(start); // read and ignore the first line in.readLine(new Text()); start = fileIn.getPos(); } this.pos = start; }
From source file:format.WikipediaPageInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, WikipediaPage> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(split.toString()); return new WikipediaPageRecordReader(context.getConfiguration()); }
From source file:formats.test.TestJsonCustomVertexOutputFormat.java
License:MIT License
/** * Compare the json output by VertexWriter and by direct call of * Gson.toJson()/* w ww.jav a 2s .c o m*/ * * @param toWrite * Used to populate Vertex in Giraph, and to produce json string * for comparison. * @throws IOException * @throws InterruptedException */ private void testWorker(VertexType toWrite) throws IOException, InterruptedException { TaskAttemptContext tac = mock(TaskAttemptContext.class); when(tac.getConfiguration()).thenReturn(conf); Vertex vertex = mock(Vertex.class); when(vertex.getId()).thenReturn(new LongWritable(toWrite.getId())); Coordinate coordinateToWrite = toWrite.getValues().getCoordinate(); double weightToWrite = toWrite.getValues().getWeight(); when(vertex.getValue()).thenReturn(new VertexValuesWritable(coordinateToWrite, weightToWrite)); List<Edge<LongWritable, EdgeValuesWritable>> edges = Lists .newArrayListWithCapacity(toWrite.getEdges().size()); for (int i = 0; i < toWrite.getEdges().size(); i++) { EdgeType edgeType = toWrite.getEdges().get(i); edges.add(EdgeFactory.create(new LongWritable(edgeType.getTargetId()), new EdgeValuesWritable(edgeType.getWeight()))); } when(vertex.getEdges()).thenReturn(edges); final RecordWriter<Text, Text> tw = mock(RecordWriter.class); JsonCustomVertexWriter writer = new JsonCustomVertexWriter() { @Override protected RecordWriter<Text, Text> createLineRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return tw; } }; writer.setConf(conf); writer.initialize(tac); writer.writeVertex(vertex); Text expected = new Text(new Gson().toJson(toWrite)); verify(tw).write(expected, null); }
From source file:fr.ens.biologie.genomique.eoulsan.bio.io.hadoop.ExpressionOutputFormat.java
License:LGPL
@Override public RecordWriter<Text, LongWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null;/*from www . ja v a2 s .com*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } // Get the output file path final Path file = getDefaultWorkFile(context, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new ExpressionRecordWriter(context, fileOut); } else { FSDataOutputStream fileOut = fs.create(file, false); return new ExpressionRecordWriter(context, new DataOutputStream(codec.createOutputStream(fileOut))); } }