List of usage examples for org.apache.hadoop.mapreduce.lib.input CombineFileSplit getLocations
public String[] getLocations() throws IOException
From source file:com.alexholmes.hadooputils.combine.common.mapreduce.SplitMetricsCombineInputFormat.java
License:Apache License
public static String extractLocation(CombineFileSplit split) throws IOException { if (split.getLocations() == null || split.getLocations().length == 0) { return null; }// w w w . j a v a 2 s. c o m return split.getLocations()[0]; }
From source file:edu.umn.cs.spatialHadoop.nasa.HDFRecordReader.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException { this.conf = conf; String datasetName = conf.get("dataset"); if (datasetName == null) throw new RuntimeException("Dataset name should be provided"); if (split instanceof CombineFileSplit) { CombineFileSplit csplits = (CombineFileSplit) split; splits = new Vector<FileSplit>(csplits.getNumPaths()); for (int i = 0; i < csplits.getNumPaths(); i++) { FileSplit fsplit = new FileSplit(csplits.getPath(i), csplits.getOffset(i), csplits.getLength(i), csplits.getLocations()); splits.add(fsplit);//from ww w. ja v a 2 s . co m } this.initialize(splits.remove(splits.size() - 1), conf); return; } inFile = ((FileSplit) split).getPath(); fs = inFile.getFileSystem(conf); if (fs instanceof HTTPFileSystem) { // For performance reasons, we don't open HDF files from HTTP inFile = new Path(FileUtil.copyFile(conf, inFile)); fs = FileSystem.getLocal(conf); this.deleteOnEnd = true; } hdfFile = new HDFFile(fs.open(inFile)); // Retrieve meta data String archiveMetadata = (String) hdfFile.findHeaderByName("ArchiveMetadata.0").getEntryAt(0); String coreMetadata = (String) hdfFile.findHeaderByName("CoreMetadata.0").getEntryAt(0); nasaDataset = new NASADataset(coreMetadata, archiveMetadata); // Retrieve the data array DDVGroup dataGroup = hdfFile.findGroupByName(datasetName); boolean fillValueFound = false; int resolution = 0; // Retrieve metadata int fillValuee = 0; for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDVDataHeader) { DDVDataHeader vheader = (DDVDataHeader) dd; if (vheader.getName().equals("_FillValue")) { Object fillValue = vheader.getEntryAt(0); if (fillValue instanceof Integer) fillValuee = (Integer) fillValue; else if (fillValue instanceof Short) fillValuee = (Short) fillValue; else if (fillValue instanceof Byte) fillValuee = (Byte) fillValue; else throw new RuntimeException("Unsupported type: " + fillValue.getClass()); fillValueFound = true; } else if (vheader.getName().equals("valid_range")) { Object minValue = vheader.getEntryAt(0); if (minValue instanceof Integer) nasaDataset.minValue = (Integer) minValue; else if (minValue instanceof Byte) nasaDataset.minValue = (Byte) minValue; Object maxValue = vheader.getEntryAt(1); if (maxValue instanceof Integer) nasaDataset.maxValue = (Integer) maxValue; else if (maxValue instanceof Byte) nasaDataset.maxValue = (Byte) maxValue; } } } // Retrieve data for (DataDescriptor dd : dataGroup.getContents()) { if (dd instanceof DDNumericDataGroup) { DDNumericDataGroup numericDataGroup = (DDNumericDataGroup) dd; valueSize = numericDataGroup.getDataSize(); resolution = numericDataGroup.getDimensions()[0]; unparsedDataArray = new byte[valueSize * resolution * resolution]; if (fillValueFound) { fillValueBytes = new byte[valueSize]; HDFConstants.writeAt(fillValueBytes, 0, fillValuee, valueSize); for (int i = 0; i < unparsedDataArray.length; i++) unparsedDataArray[i] = fillValueBytes[i % valueSize]; } numericDataGroup.getAsByteArray(unparsedDataArray, 0, unparsedDataArray.length); } } nasaDataset.resolution = resolution; if (!fillValueFound) { skipFillValue = false; } else { skipFillValue = conf.getBoolean("skipfill", true); // Whether we need to recover fill values or not boolean recoverFillValues = conf.getBoolean("recoverholes", true); if (recoverFillValues) recoverFillValues(conf); } this.nasaShape = (S) OperationsParams.getShape(conf, "shape", new NASARectangle()); this.nasaShape.setTimestamp(nasaDataset.time); this.value = new NASAIterator(); }
From source file:gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat.java
License:Apache License
/** * Set the number of locations in the split to SPLIT_MAX_NUM_LOCATIONS if it is larger than * SPLIT_MAX_NUM_LOCATIONS (MAPREDUCE-5186). *//*from ww w . j ava 2 s . c om*/ private static List<InputSplit> cleanSplits(List<InputSplit> splits) throws IOException { if (VersionInfo.getVersion().compareTo("2.3.0") >= 0) { // This issue was fixed in 2.3.0, if newer version, no need to clean up splits return splits; } List<InputSplit> cleanedSplits = Lists.newArrayList(); for (int i = 0; i < splits.size(); i++) { CombineFileSplit oldSplit = (CombineFileSplit) splits.get(i); String[] locations = oldSplit.getLocations(); Preconditions.checkNotNull(locations, "CombineFileSplit.getLocations() returned null"); if (locations.length > SPLIT_MAX_NUM_LOCATIONS) { locations = Arrays.copyOf(locations, SPLIT_MAX_NUM_LOCATIONS); } cleanedSplits.add(new CombineFileSplit(oldSplit.getPaths(), oldSplit.getStartOffsets(), oldSplit.getLengths(), locations)); } return cleanedSplits; }
From source file:org.apache.mahout.text.WholeFileRecordReader.java
License:Apache License
public WholeFileRecordReader(CombineFileSplit fileSplit, TaskAttemptContext taskAttemptContext, Integer idx) throws IOException { this.fileSplit = new FileSplit(fileSplit.getPath(idx), fileSplit.getOffset(idx), fileSplit.getLength(idx), fileSplit.getLocations()); this.configuration = taskAttemptContext.getConfiguration(); this.index = new IntWritable(idx); this.fileFilterClassName = this.configuration.get(FILE_FILTER_CLASS_OPTION[0]); }
From source file:org.kitesdk.data.spi.filesystem.AbstractCombineFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { if (delegate != null) { delegate.close();//from w w w . j a v a 2s .co m } if (split instanceof CombineFileSplit) { CombineFileSplit combineSplit = (CombineFileSplit) split; FileSplit fileSplit = new FileSplit(combineSplit.getPath(idx), combineSplit.getOffset(idx), combineSplit.getLength(idx), combineSplit.getLocations()); delegate = getInputFormat().createRecordReader(fileSplit, context); delegate.initialize(fileSplit, context); } else { throw new DatasetOperationException("Split is not a CombineFileSplit: %s:%s", split.getClass().getCanonicalName(), split); } }