List of usage examples for org.apache.hadoop.mapreduce InputSplit getLength
public abstract long getLength() throws IOException, InterruptedException;
From source file:edu.umn.cs.spatialHadoop.mapreduce.SpatialRecordReader3.java
License:Open Source License
public void initialize(InputSplit split, Configuration conf) throws IOException, InterruptedException { FileSplit fsplit = (FileSplit) split; if (compressionCodecFactory == null) compressionCodecFactory = new CompressionCodecFactory(conf); LOG.info("Open a SpatialRecordReader to split: " + split); this.path = fsplit.getPath(); this.start = fsplit.getStart(); this.end = this.start + split.getLength(); this.fs = this.path.getFileSystem(conf); this.directIn = fs.open(this.path); codec = compressionCodecFactory.getCodec(this.path); if (codec != null) { // Input is compressed, create a decompressor to decompress it decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { // A splittable compression codec, can seek to the desired input pos final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( directIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = cIn;/*w w w.j a va 2 s. c om*/ start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); // take pos from compressed stream as we adjusted both start and end // to match with the compressed file progressPosition = cIn; } else { // Non-splittable input, need to start from the beginning CompressionInputStream cIn = codec.createInputStream(directIn, decompressor); in = cIn; progressPosition = cIn; } } else { // Non-compressed file, seek to the desired position and use this stream // to get the progress and position directIn.seek(start); in = directIn; progressPosition = directIn; } this.stockShape = (V) OperationsParams.getShape(conf, "shape"); this.tempLine = new Text(); this.lineReader = new LineReader(in); bytesRead = 0; if (this.start != 0) { // Skip until first end-of-line reached bytesRead += lineReader.readLine(tempLine); } if (conf.get(SpatialInputFormat3.InputQueryRange) != null) { // Retrieve the input query range to apply on all records this.inputQueryRange = OperationsParams.getShape(conf, SpatialInputFormat3.InputQueryRange); this.inputQueryMBR = this.inputQueryRange.getMBR(); } // Check if there is an associated global index to read cell boundaries GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, path.getParent()); if (gindex == null) { cellMBR = new Partition(); cellMBR.filename = path.getName(); cellMBR.invalidate(); } else { // Set from the associated partition in the global index for (Partition p : gindex) { if (p.filename.equals(this.path.getName())) cellMBR = p; } } this.value = new ShapeIterator<V>(); value.setShape(stockShape); }
From source file:edu.umn.cs.spatialHadoop.OperationsParams.java
License:Open Source License
/** * Checks whether the operation should work in local or MapReduce mode. If * the job explicitly specifies whether to run in local or MapReduce mode, * the specified option is returned. Otherwise, it automatically detects * whether to use local or MapReduce based on the input size. * // w w w . j a v a2s . c o m * @return <code>true</code> to run in local mode, <code>false</code> to run * in MapReduce mode. * @throws IOException If the underlying job fails with an IOException * @throws InterruptedException If the underlying job was interrupted */ public static boolean isLocal(Configuration jobConf, Path... input) throws IOException, InterruptedException { final boolean LocalProcessing = true; final boolean MapReduceProcessing = false; // Whatever is explicitly set has the highest priority if (jobConf.get("local") != null) return jobConf.getBoolean("local", false); // If any of the input files are hidden, use local processing for (Path inputFile : input) { if (!SpatialSite.NonHiddenFileFilter.accept(inputFile)) return LocalProcessing; } if (input.length > MaxSplitsForLocalProcessing) { LOG.info("Too many files. Using MapReduce"); return MapReduceProcessing; } Job job = new Job(jobConf); // To ensure we don't change the original SpatialInputFormat3.setInputPaths(job, input); SpatialInputFormat3<Partition, Shape> inputFormat = new SpatialInputFormat3<Partition, Shape>(); try { List<InputSplit> splits = inputFormat.getSplits(job); if (splits.size() > MaxSplitsForLocalProcessing) return MapReduceProcessing; long totalSize = 0; for (InputSplit split : splits) totalSize += split.getLength(); if (totalSize > MaxSizeForLocalProcessing) { LOG.info("Input size is too large. Using MapReduce"); return MapReduceProcessing; } LOG.info("Input size is small enough to use local machine"); return LocalProcessing; } catch (IOException e) { LOG.warn("Cannot get splits for input"); return MapReduceProcessing; } }
From source file:eu.scape_project.pt.mets.hadoop.MetsInputFormat.java
License:Apache License
@Override public RecordReader<Text, DTO> createRecordReader(InputSplit split, TaskAttemptContext context) { try {//from w w w . j a v a 2 s . c om LOG.debug("split.length = " + split.getLength()); LOG.debug("split.string = " + split.toString()); } catch (IOException ex) { LOG.error(ex.getMessage()); } catch (InterruptedException ex) { LOG.error(ex.getMessage()); } String tag = context.getConfiguration().get(MetsInputFormat.TAG); return new MetsRecordReader(tag); }
From source file:ml.shifu.guagua.mapreduce.GuaguaInputFormat.java
License:Apache License
public static List<List<InputSplit>> getCombineGuaguaSplits(List<InputSplit> oneInputSplits, long maxCombinedSplitSize) throws IOException, InterruptedException { List<Node> nodes = new ArrayList<Node>(); Map<String, Node> nodeMap = new HashMap<String, Node>(); List<List<InputSplit>> result = new ArrayList<List<InputSplit>>(); List<Long> resultLengths = new ArrayList<Long>(); long comparableSplitId = 0; int size = 0, nSplits = oneInputSplits.size(); InputSplit lastSplit = null;/*w w w .j a v a 2 s . c o m*/ int emptyCnt = 0; for (InputSplit split : oneInputSplits) { if (split.getLength() == 0) { emptyCnt++; continue; } if (split.getLength() >= maxCombinedSplitSize) { comparableSplitId++; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(split); result.add(combinedSplits); resultLengths.add(split.getLength()); } else { ComparableSplit csplit = new ComparableSplit(split, comparableSplitId++); String[] locations = split.getLocations(); // sort the locations to stabilize the number of maps: PIG-1757 Arrays.sort(locations); HashSet<String> locationSeen = new HashSet<String>(); for (String location : locations) { if (!locationSeen.contains(location)) { Node node = nodeMap.get(location); if (node == null) { node = new Node(); nodes.add(node); nodeMap.put(location, node); } node.add(csplit); csplit.add(node); locationSeen.add(location); } } lastSplit = split; size++; } } if (nSplits > 0 && emptyCnt == nSplits) { // if all splits are empty, add a single empty split as currently an empty directory is // not properly handled somewhere List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(oneInputSplits.get(0)); result.add(combinedSplits); } else if (size == 1) { List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(lastSplit); result.add(combinedSplits); } else if (size > 1) { // combine small splits Collections.sort(nodes, nodeComparator); DummySplit dummy = new DummySplit(); // dummy is used to search for next split of suitable size to be combined ComparableSplit dummyComparableSplit = new ComparableSplit(dummy, -1); for (Node node : nodes) { // sort the splits on this node in descending order node.sort(); long totalSize = 0; List<ComparableSplit> splits = node.getSplits(); int idx; int lenSplits; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); while (!splits.isEmpty()) { combinedSplits.add(splits.get(0).getSplit()); combinedComparableSplits.add(splits.get(0)); int startIdx = 1; lenSplits = splits.size(); totalSize += splits.get(0).getSplit().getLength(); long spaceLeft = maxCombinedSplitSize - totalSize; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; while (idx < lenSplits) { long thisLen = splits.get(idx).getSplit().getLength(); combinedSplits.add(splits.get(idx).getSplit()); combinedComparableSplits.add(splits.get(idx)); totalSize += thisLen; spaceLeft -= thisLen; if (spaceLeft <= 0) break; // find next combinable chunk startIdx = idx + 1; if (startIdx >= lenSplits) break; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; } if (totalSize > maxCombinedSplitSize / 2) { result.add(combinedSplits); resultLengths.add(totalSize); removeSplits(combinedComparableSplits); totalSize = 0; combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); splits = node.getSplits(); } else { if (combinedSplits.size() != lenSplits) throw new AssertionError("Combined split logic error!"); break; } } } // handle leftovers List<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>(); Set<InputSplit> seen = new HashSet<InputSplit>(); for (Node node : nodes) { for (ComparableSplit split : node.getSplits()) { if (!seen.contains(split.getSplit())) { // remove duplicates. The set has to be on the raw input split not the // comparable input split as the latter overrides the compareTo method // so its equality semantics is changed and not we want here seen.add(split.getSplit()); leftoverSplits.add(split); } } } if (!leftoverSplits.isEmpty()) { long totalSize = 0; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); int splitLen = leftoverSplits.size(); for (int i = 0; i < splitLen; i++) { ComparableSplit split = leftoverSplits.get(i); long thisLen = split.getSplit().getLength(); if (totalSize + thisLen >= maxCombinedSplitSize) { removeSplits(combinedComparableSplits); result.add(combinedSplits); resultLengths.add(totalSize); combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); totalSize = 0; } combinedSplits.add(split.getSplit()); combinedComparableSplits.add(split); totalSize += split.getSplit().getLength(); if (i == splitLen - 1) { // last piece: it could be very small, try to see it can be squeezed into any existing splits for (int j = 0; j < result.size(); j++) { if (resultLengths.get(j) + totalSize <= maxCombinedSplitSize) { List<InputSplit> isList = result.get(j); for (InputSplit csplit : combinedSplits) { isList.add(csplit); } removeSplits(combinedComparableSplits); combinedSplits.clear(); break; } } if (!combinedSplits.isEmpty()) { // last piece can not be squeezed in, create a new combined split for them. removeSplits(combinedComparableSplits); result.add(combinedSplits); } } } } } LOG.info("Total input paths (combined) to process : {}", result.size()); return result; }
From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java
License:Apache License
private static SplitMetaInfo[] writeOldSplits(org.apache.hadoop.mapred.InputSplit[] splits, FSDataOutputStream out, Configuration conf) throws IOException { SplitMetaInfo[] info = new SplitMetaInfo[splits.length]; if (splits.length != 0) { int i = 0; long offset = out.getPos(); for (org.apache.hadoop.mapred.InputSplit split : splits) { long prevLen = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out);// w w w . j av a 2 s . c om long currLen = out.getPos(); String[] locations = split.getLocations(); final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10); if (locations.length > max_loc) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + max_loc); locations = Arrays.copyOf(locations, max_loc); } info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength()); offset += currLen - prevLen; } } return info; }
From source file:ml.shifu.guagua.yarn.util.InputSplitUtils.java
License:Apache License
public static List<List<InputSplit>> getCombineGuaguaSplits(List<InputSplit> oneInputSplits, long maxCombinedSplitSize) throws IOException, InterruptedException { ArrayList<Node> nodes = new ArrayList<Node>(); HashMap<String, Node> nodeMap = new HashMap<String, Node>(); List<List<InputSplit>> result = new ArrayList<List<InputSplit>>(); List<Long> resultLengths = new ArrayList<Long>(); long comparableSplitId = 0; int size = 0, nSplits = oneInputSplits.size(); InputSplit lastSplit = null;// ww w. ja v a 2 s . co m int emptyCnt = 0; for (InputSplit split : oneInputSplits) { if (split.getLength() == 0) { emptyCnt++; continue; } if (split.getLength() >= maxCombinedSplitSize) { comparableSplitId++; ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(split); result.add(combinedSplits); resultLengths.add(split.getLength()); } else { ComparableSplit csplit = new ComparableSplit(split, comparableSplitId++); String[] locations = split.getLocations(); // sort the locations to stabilize the number of maps: PIG-1757 Arrays.sort(locations); HashSet<String> locationSeen = new HashSet<String>(); for (String location : locations) { if (!locationSeen.contains(location)) { Node node = nodeMap.get(location); if (node == null) { node = new Node(); nodes.add(node); nodeMap.put(location, node); } node.add(csplit); csplit.add(node); locationSeen.add(location); } } lastSplit = split; size++; } } if (nSplits > 0 && emptyCnt == nSplits) { // if all splits are empty, add a single empty split as currently an empty directory is // not properly handled somewhere ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(oneInputSplits.get(0)); result.add(combinedSplits); } else if (size == 1) { ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(lastSplit); result.add(combinedSplits); } else if (size > 1) { // combine small splits Collections.sort(nodes, nodeComparator); DummySplit dummy = new DummySplit(); // dummy is used to search for next split of suitable size to be combined ComparableSplit dummyComparableSplit = new ComparableSplit(dummy, -1); for (Node node : nodes) { // sort the splits on this node in descending order node.sort(); long totalSize = 0; ArrayList<ComparableSplit> splits = node.getSplits(); int idx; int lenSplits; ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>(); ArrayList<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); while (!splits.isEmpty()) { combinedSplits.add(splits.get(0).getSplit()); combinedComparableSplits.add(splits.get(0)); int startIdx = 1; lenSplits = splits.size(); totalSize += splits.get(0).getSplit().getLength(); long spaceLeft = maxCombinedSplitSize - totalSize; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; while (idx < lenSplits) { long thisLen = splits.get(idx).getSplit().getLength(); combinedSplits.add(splits.get(idx).getSplit()); combinedComparableSplits.add(splits.get(idx)); totalSize += thisLen; spaceLeft -= thisLen; if (spaceLeft <= 0) break; // find next combinable chunk startIdx = idx + 1; if (startIdx >= lenSplits) break; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; } if (totalSize > maxCombinedSplitSize / 2) { result.add(combinedSplits); resultLengths.add(totalSize); removeSplits(combinedComparableSplits); totalSize = 0; combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); splits = node.getSplits(); } else { if (combinedSplits.size() != lenSplits) throw new AssertionError("Combined split logic error!"); break; } } } // handle leftovers ArrayList<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>(); HashSet<InputSplit> seen = new HashSet<InputSplit>(); for (Node node : nodes) { for (ComparableSplit split : node.getSplits()) { if (!seen.contains(split.getSplit())) { // remove duplicates. The set has to be on the raw input split not the // comparable input split as the latter overrides the compareTo method // so its equality semantics is changed and not we want here seen.add(split.getSplit()); leftoverSplits.add(split); } } } if (!leftoverSplits.isEmpty()) { long totalSize = 0; ArrayList<InputSplit> combinedSplits = new ArrayList<InputSplit>(); ArrayList<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); int splitLen = leftoverSplits.size(); for (int i = 0; i < splitLen; i++) { ComparableSplit split = leftoverSplits.get(i); long thisLen = split.getSplit().getLength(); if (totalSize + thisLen >= maxCombinedSplitSize) { removeSplits(combinedComparableSplits); result.add(combinedSplits); resultLengths.add(totalSize); combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); totalSize = 0; } combinedSplits.add(split.getSplit()); combinedComparableSplits.add(split); totalSize += split.getSplit().getLength(); if (i == splitLen - 1) { // last piece: it could be very small, try to see it can be squeezed into any existing splits for (int j = 0; j < result.size(); j++) { if (resultLengths.get(j) + totalSize <= maxCombinedSplitSize) { List<InputSplit> isList = result.get(j); for (InputSplit csplit : combinedSplits) { isList.add(csplit); } removeSplits(combinedComparableSplits); combinedSplits.clear(); break; } } if (!combinedSplits.isEmpty()) { // last piece can not be squeezed in, create a new combined split for them. removeSplits(combinedComparableSplits); result.add(combinedSplits); } } } } } LOG.info("Total input paths (combined) to process : {}", result.size()); return result; }
From source file:ml.shifu.shifu.core.mr.input.CombineInputFormat.java
License:Apache License
public static List<List<InputSplit>> getCombineVarSelectSplits(List<InputSplit> oneInputSplits, long maxCombinedSplitSize) throws IOException, InterruptedException { List<Node> nodes = new ArrayList<Node>(); Map<String, Node> nodeMap = new HashMap<String, Node>(); List<List<InputSplit>> result = new ArrayList<List<InputSplit>>(); List<Long> resultLengths = new ArrayList<Long>(); long comparableSplitId = 0; int size = 0, nSplits = oneInputSplits.size(); InputSplit lastSplit = null;//ww w . j a va2 s . c o m int emptyCnt = 0; for (InputSplit split : oneInputSplits) { if (split.getLength() == 0) { emptyCnt++; continue; } if (split.getLength() >= maxCombinedSplitSize) { comparableSplitId++; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(split); result.add(combinedSplits); resultLengths.add(split.getLength()); } else { ComparableSplit csplit = new ComparableSplit(split, comparableSplitId++); String[] locations = split.getLocations(); // sort the locations to stabilize the number of maps: PIG-1757 Arrays.sort(locations); HashSet<String> locationSeen = new HashSet<String>(); for (String location : locations) { if (!locationSeen.contains(location)) { Node node = nodeMap.get(location); if (node == null) { node = new Node(); nodes.add(node); nodeMap.put(location, node); } node.add(csplit); csplit.add(node); locationSeen.add(location); } } lastSplit = split; size++; } } if (nSplits > 0 && emptyCnt == nSplits) { // if all splits are empty, add a single empty split as currently an empty directory is // not properly handled somewhere List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(oneInputSplits.get(0)); result.add(combinedSplits); } else if (size == 1) { List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); combinedSplits.add(lastSplit); result.add(combinedSplits); } else if (size > 1) { // combine small splits Collections.sort(nodes, nodeComparator); DummySplit dummy = new DummySplit(); // dummy is used to search for next split of suitable size to be combined ComparableSplit dummyComparableSplit = new ComparableSplit(dummy, -1); for (Node node : nodes) { // sort the splits on this node in descending order node.sort(); long totalSize = 0; List<ComparableSplit> splits = node.getSplits(); int idx; int lenSplits; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); while (!splits.isEmpty()) { combinedSplits.add(splits.get(0).getSplit()); combinedComparableSplits.add(splits.get(0)); int startIdx = 1; lenSplits = splits.size(); totalSize += splits.get(0).getSplit().getLength(); long spaceLeft = maxCombinedSplitSize - totalSize; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; while (idx < lenSplits) { long thisLen = splits.get(idx).getSplit().getLength(); combinedSplits.add(splits.get(idx).getSplit()); combinedComparableSplits.add(splits.get(idx)); totalSize += thisLen; spaceLeft -= thisLen; if (spaceLeft <= 0) break; // find next combinable chunk startIdx = idx + 1; if (startIdx >= lenSplits) break; dummy.setLength(spaceLeft); idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits), dummyComparableSplit); idx = -idx - 1 + startIdx; } if (totalSize > maxCombinedSplitSize / 2) { result.add(combinedSplits); resultLengths.add(totalSize); removeSplits(combinedComparableSplits); totalSize = 0; combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); splits = node.getSplits(); } else { if (combinedSplits.size() != lenSplits) throw new AssertionError("Combined split logic error!"); break; } } } // handle leftovers List<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>(); Set<InputSplit> seen = new HashSet<InputSplit>(); for (Node node : nodes) { for (ComparableSplit split : node.getSplits()) { if (!seen.contains(split.getSplit())) { // remove duplicates. The set has to be on the raw input split not the // comparable input split as the latter overrides the compareTo method // so its equality semantics is changed and not we want here seen.add(split.getSplit()); leftoverSplits.add(split); } } } if (!leftoverSplits.isEmpty()) { long totalSize = 0; List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>(); int splitLen = leftoverSplits.size(); for (int i = 0; i < splitLen; i++) { ComparableSplit split = leftoverSplits.get(i); long thisLen = split.getSplit().getLength(); if (totalSize + thisLen >= maxCombinedSplitSize) { removeSplits(combinedComparableSplits); result.add(combinedSplits); resultLengths.add(totalSize); combinedSplits = new ArrayList<InputSplit>(); combinedComparableSplits.clear(); totalSize = 0; } combinedSplits.add(split.getSplit()); combinedComparableSplits.add(split); totalSize += split.getSplit().getLength(); if (i == splitLen - 1) { // last piece: it could be very small, try to see it can be squeezed into any existing splits for (int j = 0; j < result.size(); j++) { if (resultLengths.get(j) + totalSize <= maxCombinedSplitSize) { List<InputSplit> isList = result.get(j); for (InputSplit csplit : combinedSplits) { isList.add(csplit); } removeSplits(combinedComparableSplits); combinedSplits.clear(); break; } } if (!combinedSplits.isEmpty()) { // last piece can not be squeezed in, create a new combined split for them. removeSplits(combinedComparableSplits); result.add(combinedSplits); } } } } } LOG.info("Total input paths (combined) to process : {}", result.size()); return result; }
From source file:nl.surfsara.warcutils.WarcSequenceFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = context.getConfiguration(); final Path path = split.getPath(); Option optPath = SequenceFile.Reader.file(path); in = new SequenceFile.Reader(conf, optPath); this.end = split.getStart() + inputSplit.getLength(); if (split.getStart() > in.getPosition()) { in.sync(split.getStart());//from w w w.ja v a2s .c o m } start = in.getPosition(); done = start >= end; }
From source file:org.apache.carbondata.hadoop.CarbonMultiBlockSplit.java
License:Apache License
@Override public long getLength() throws IOException, InterruptedException { long total = 0; for (InputSplit split : splitList) { total += split.getLength(); }/* ww w . j a v a2s . co m*/ return total; }
From source file:org.apache.hawq.pxf.plugins.hdfs.ParquetDataFragmenter.java
License:Apache License
private List<InputSplit> getSplits(Path path) throws IOException { ParquetInputFormat<Group> parquetInputFormat = new ParquetInputFormat<Group>(); ParquetInputFormat.setInputPaths(job, path); List<InputSplit> splits = parquetInputFormat.getSplits(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); if (splits != null) { for (InputSplit split : splits) { try { if (split.getLength() > 0) { result.add(split);//from www.j a v a 2 s. com } } catch (InterruptedException e) { throw new RuntimeException("Unable to read split's length", e); } } } return result; }