List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLength
public long getLength(int i)
From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java
License:Creative Commons License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * //w ww. j a v a 2 s. co m * @param fileSplit * The CombineFileSplit that this will read from. * @param context * The context for this task. * @param pathToProcess * The path index from the CombineFileSplit to process in this * record. */ public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { isProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); /* never used in production, just for code integrity */ assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { FileSystem fs = FileSystem.get(mConf); /* never used in production, just for code integrity */ assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { LOG.debug("Problem in file length"); } } fileContent = new BytesWritable(); }
From source file:com.moz.fiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * * @param fileSplit The CombineFileSplit that this will read from. * @param context The context for this task. * @param pathToProcess The path index from the CombineFileSplit to process in this record. *//*from w w w.j a va 2s . com*/ public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { mProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { final FileSystem fs = mFileToRead.getFileSystem(mConf); assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { // oh well, I was just testing. } } mFileName = new Text(); mFileText = new Text(); }
From source file:edu.gslis.streamcorpus.ThriftRecordReader.java
License:Apache License
public ThriftRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); this.pos = startOffset; in = fs.open(path);/* w w w .ja v a 2s .c o m*/ if (path.toUri().toString().endsWith("xz")) tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(new XZInputStream(in))); else tp = new TBinaryProtocol.Factory().getProtocol(new TIOStreamTransport(in)); }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { this.conf = conf; String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); if (split instanceof CombineFileSplit) { CombineFileSplit csplits = (CombineFileSplit) split; splits = new Vector<FileSplit>(csplits.getNumPaths()); for (int i = 0; i < csplits.getNumPaths(); i++) { FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i), csplits.getLocations()); splits.add(fsplit);/*w w w . ja v a2 s .c om*/ } this.initialize(splits.remove(splits.size() - 1), conf); return; } inFile = ((FileSplit) split).getPath(); fs = inFile.getFileSystem(conf); if (fs instanceof HTTPFileSystem) { // For performance reasons, we don't open HDF files from HTTP inFile = new Path(FileUtil.copyFile(conf, inFile)); fs = FileSystem.getLocal(conf); this.deleteOnEnd = true; } hdfFile = new HDFFile(fs.open(inFile)); // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); nasaDataset = new NASADataset(coreMetadata, archiveMetadata); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; // Retrieve metadata int fillValuee = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { Object fillValue = vheader.getEntryAt(0); if (fillValue instanceof Integer) fillValuee = (Integer) fillValue; else if (fillValue instanceof Short) fillValuee = (Short) fillValue; else if (fillValue instanceof Byte) fillValuee = (Byte) fillValue; else throw new RuntimeException("Unsupported type: " + fillValue.getClass()); fillValueFound = true; } else if (vheader.getName().equals("valid_range")) { Object minValue = vheader.getEntryAt(0); if (minValue instanceof Integer) nasaDataset.minValue = (Integer) minValue; else if (minValue instanceof Byte) nasaDataset.minValue = (Byte) minValue; Object maxValue = vheader.getEntryAt(1); if (maxValue instanceof Integer) nasaDataset.maxValue = (Integer) maxValue; else if (maxValue instanceof Byte) nasaDataset.maxValue = (Byte) maxValue; } } } // Retrieve data for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; valueSize = numericDataGroup.getDataSize(); resolution = numericDataGroup.getDimensions()[0]; unparsedDataArray = new byte[valueSize * resolution * resolution]; if (fillValueFound) { fillValueBytes = new byte[valueSize]; HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize); for (int i = 0; i < unparsedDataArray.length; i++) unparsedDataArray[i] = fillValueBytes[i % valueSize]; } numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length); } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { skipFillValue = conf.getBoolean("skipfill", true); // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.nasaShape.setTimestamp(nasaDataset.time); this.value = new NASAIterator(); }
From source file:fire.util.fileformats.combinetextfileinputformat.CombineFileLineRecordReader.java
License:Apache License
public CombineFileLineRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index) throws IOException { this.path = split.getPath(index); fs = this.path.getFileSystem(context.getConfiguration()); this.startOffset = split.getOffset(index); this.end = startOffset + split.getLength(index); boolean skipFirstLine = false; //open the file fileIn = fs.open(path);/*from w w w .j av a2 s .c o m*/ if (startOffset != 0) { skipFirstLine = true; --startOffset; fileIn.seek(startOffset); } reader = new LineReader(fileIn); if (skipFirstLine) { // skip first line and re-establish "startOffset". startOffset += reader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - startOffset)); } this.pos = startOffset; }
From source file:org.apache.mahout.text.WholeFileRecordReader.java
License:Apache License
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx) throws IOException { this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), fileSplit.getLength(idx), fileSplit.getLocations());/* w w w .jav a 2 s . co m*/ this.configuration = taskAttemptContext.getConfiguration(); this.index = new IntWritable(idx); this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]); }
From source file:org.kiji.mapreduce.input.impl.WholeFileRecordReader.java
License:Apache License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * * @param fileSplit The CombineFileSplit that this will read from. * @param context The context for this task. * @param pathToProcess The path index from the CombineFileSplit to process in this record. *///from ww w.j a v a 2 s.c o m public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { mProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { FileSystem fs = FileSystem.get(mConf); assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { // oh well, I was just testing. } } mFileName = new Text(); mFileText = new Text(); }
From source file:org.kitesdk.data.spi.filesystem.AbstractCombineFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (delegate != null) { delegate.close();/*from w w w .j a v a2 s . com*/ } if (split instanceof CombineFileSplit) { CombineFileSplit combineSplit = (CombineFileSplit) split; FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx), combineSplit.getLength(idx), combineSplit.getLocations()); delegate = getInputFormat().createRecordReader(fileSplit, context); delegate.initialize(fileSplit, context); } else { throw new DatasetOperationException("Split is not a CombineFileSplit: %s:%s", split.getClass().getCanonicalName(), split); } }