List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getPath
public Path getPath(int i)
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.AbstractGFRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { CombineFileSplit cSplit = (CombineFileSplit) split; Path[] path = cSplit.getPaths(); long[] start = cSplit.getStartOffsets(); long[] len = cSplit.getLengths(); Configuration conf = context.getConfiguration(); FileSystem fs = cSplit.getPath(0).getFileSystem(conf); this.splitIterator = HDFSSplitIterator.newInstance(fs, path, start, len, 0l, 0l); }
From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java
License:Creative Commons License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * /*from w w w.j av a 2 s . c o m*/ * @param fileSplit * The CombineFileSplit that this will read from. * @param context * The context for this task. * @param pathToProcess * The path index from the CombineFileSplit to process in this * record. */ public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { isProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); /* never used in production, just for code integrity */ assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { FileSystem fs = FileSystem.get(mConf); /* never used in production, just for code integrity */ assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { LOG.debug("Problem in file length"); } } fileContent = new BytesWritable(); }
From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * * @param fileSplit The CombineFileSplit that this will read from. * @param context The context for this task. * @param pathToProcess The path index from the CombineFileSplit to process in this record. *//*from w w w .java 2s.c o m*/ public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { mProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { final FileSystem fs = mFileToRead.getFileSystem(mConf); assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { // oh well, I was just testing. } } mFileName = new Text(); mFileText = new Text(); }
From source file:edu.gslis.streamcorpus.ThriftRecordReader.java
License:Apache License
public ThriftRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); this.pos = startOffset; in = fs.open(path);/* w ww .ja v a 2s. c o m*/ if (path.toUri().toString().endsWith("xz")) tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(new XZInputStream(in))); else tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in)); }
From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialInputFormat3.java
License:Open Source License
@Override public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path path;/*from w w w. jav a2 s . c o m*/ String extension; if (split instanceof FileSplit) { FileSplit fsplit = (FileSplit) split; extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath()); } else if (split instanceof CombineFileSplit) { CombineFileSplit csplit = (CombineFileSplit) split; extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0)); } else { throw new RuntimeException("Cannot process plits of type " + split.getClass()); } // If this extension is for a compression, skip it and take the previous // extension if (extension.equals("hdf")) { // HDF File. Create HDFRecordReader return (RecordReader) new HDFRecordReader(); } if (extension.equals("rtree")) { // File is locally indexed as RTree return (RecordReader) new RTreeRecordReader3<V>(); } // For backward compatibility, check if the file is RTree indexed from // its signature Configuration conf = context != null ? context.getConfiguration() : new Configuration(); if (SpatialSite.isRTree(path.getFileSystem(conf), path)) { return (RecordReader) new RTreeRecordReader3<V>(); } // Check if a custom record reader is configured with this extension Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader", SpatialRecordReader3.class); try { return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance(); } catch (InstantiationException e) { } catch (IllegalAccessException e) { } // Use the default SpatialRecordReader if none of the above worked return (RecordReader) new SpatialRecordReader3<V>(); }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { this.conf = conf; String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); if (split instanceof CombineFileSplit) { CombineFileSplit csplits = (CombineFileSplit) split; splits = new Vector<FileSplit>(csplits.getNumPaths()); for (int i = 0; i < csplits.getNumPaths(); i++) { FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i), csplits.getLocations()); splits.add(fsplit);// w w w . j a v a2 s .co m } this.initialize(splits.remove(splits.size() - 1), conf); return; } inFile = ((FileSplit) split).getPath(); fs = inFile.getFileSystem(conf); if (fs instanceof HTTPFileSystem) { // For performance reasons, we don't open HDF files from HTTP inFile = new Path(FileUtil.copyFile(conf, inFile)); fs = FileSystem.getLocal(conf); this.deleteOnEnd = true; } hdfFile = new HDFFile(fs.open(inFile)); // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); nasaDataset = new NASADataset(coreMetadata, archiveMetadata); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; // Retrieve metadata int fillValuee = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { Object fillValue = vheader.getEntryAt(0); if (fillValue instanceof Integer) fillValuee = (Integer) fillValue; else if (fillValue instanceof Short) fillValuee = (Short) fillValue; else if (fillValue instanceof Byte) fillValuee = (Byte) fillValue; else throw new RuntimeException("Unsupported type: " + fillValue.getClass()); fillValueFound = true; } else if (vheader.getName().equals("valid_range")) { Object minValue = vheader.getEntryAt(0); if (minValue instanceof Integer) nasaDataset.minValue = (Integer) minValue; else if (minValue instanceof Byte) nasaDataset.minValue = (Byte) minValue; Object maxValue = vheader.getEntryAt(1); if (maxValue instanceof Integer) nasaDataset.maxValue = (Integer) maxValue; else if (maxValue instanceof Byte) nasaDataset.maxValue = (Byte) maxValue; } } } // Retrieve data for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; valueSize = numericDataGroup.getDataSize(); resolution = numericDataGroup.getDimensions()[0]; unparsedDataArray = new byte[valueSize * resolution * resolution]; if (fillValueFound) { fillValueBytes = new byte[valueSize]; HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize); for (int i = 0; i < unparsedDataArray.length; i++) unparsedDataArray[i] = fillValueBytes[i % valueSize]; } numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length); } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { skipFillValue = conf.getBoolean("skipfill", true); // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.nasaShape.setTimestamp(nasaDataset.time); this.value = new NASAIterator(); }
From source file:edu.umn.cs.sthadoop.mapreduce.SpatioTemporalInputFormat.java
License:Open Source License
@Override public RecordReader<K, Iterable<V>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Path path;//from www .ja v a 2 s .c o m String extension; if (split instanceof FileSplit) { FileSplit fsplit = (FileSplit) split; extension = FileUtil.getExtensionWithoutCompression(path = fsplit.getPath()); } else if (split instanceof CombineFileSplit) { CombineFileSplit csplit = (CombineFileSplit) split; extension = FileUtil.getExtensionWithoutCompression(path = csplit.getPath(0)); } else { throw new RuntimeException("Cannot process plits of type " + split.getClass()); } // If this extension is for a compression, skip it and take the previous // extension if (extension.equals("hdf")) { // HDF File. Create HDFRecordReader return (RecordReader) new HDFRecordReader(); } if (extension.equals("rtree")) { // File is locally indexed as RTree return (RecordReader) new RTreeRecordReader3<V>(); } // For backward compatibility, check if the file is RTree indexed from // its signature Configuration conf = context != null ? context.getConfiguration() : new Configuration(); if (SpatialSite.isRTree(path.getFileSystem(conf), path)) { return (RecordReader) new RTreeRecordReader3<V>(); } // Check if a custom record reader is configured with this extension Class<?> recordReaderClass = conf.getClass("SpatialInputFormat." + extension + ".recordreader", SpatioTemporalRecordReader.class); try { return (RecordReader<K, Iterable<V>>) recordReaderClass.newInstance(); } catch (InstantiationException e) { } catch (IllegalAccessException e) { } // Use the default SpatioTemporalRecordReader if none of the above worked return (RecordReader) new SpatioTemporalRecordReader<V>(); }
From source file:fire.util.fileformats.combineimagefileinputformat.CombineFileImageRecordReader.java
License:Apache License
public CombineFileImageRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); }
From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java
License:Apache License
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path);/*from w w w .j av a2s.c o m*/ if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyCombineFileRecordReader.java
License:Apache License
private static Schema getSchema(CombineFileSplit split, TaskAttemptContext cx, Integer idx) throws IOException { Schema schema = AvroJob.getInputKeySchema(cx.getConfiguration()); if (schema != null) { return schema; }/*from ww w . j a v a 2 s. com*/ Path path = split.getPath(idx); FileSystem fs = path.getFileSystem(cx.getConfiguration()); return AvroUtils.getSchemaFromDataFile(path, fs); }