List of usage examples for org.apache.hadoop.mapreduce InputSplit getLength
public abstract long getLength() throws IOException, InterruptedException;
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private void runMap(Job job, KeyValueSorter<?, ?> sorter) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = job.getConfiguration(); InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = input.getSplits(job); int serial = 1; for (InputSplit split : splits) { TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0); Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$ mapper.getClass().getName(), id, split.getLength())); }/*w ww.jav a 2s .co m*/ TaskAttemptContext context = newTaskAttemptContext(conf, id); // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output.getOutputCommitter(context); committer.setupTask(context); boolean succeed = false; try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) { RecordWriter<?, ?> writer; if (sorter != null) { writer = new ShuffleWriter(sorter); } else { writer = output.getRecordWriter(newTaskAttemptContext(conf, id)); } try { Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split); reader.initialize(split, c); mapper.run(c); } finally { writer.close(newTaskAttemptContext(conf, id)); } doCommitTask(context, committer); succeed = true; } finally { if (succeed == false) { doAbortTask(context, committer); } } } }
From source file:com.basho.riak.hadoop.RiakInputFormatTest.java
License:Apache License
@Test public void getSplits() throws Exception { final List<BucketKey> bks = new LinkedList<BucketKey>(); for (int i = 0; i < 100001; i++) { bks.add(new BucketKey(BUCKET, KEY + i)); }/* w ww. j a v a 2 s . c o m*/ RiakLocation[] locations = new RiakLocation[] { new RiakLocation("host1", 8091), new RiakLocation("host2", 8091), new RiakLocation("host3", 8091), new RiakLocation("host4", 8091) }; List<InputSplit> splits = RiakInputFormat.getSplits(bks, locations, 999); assertEquals("Expected 101 splits", 101, splits.size()); int _999SplitCnt = 0; int _101SplitCnt = 0; int otherSplitCnt = 0; for (InputSplit is : splits) { long length = is.getLength(); if (length == 999) { _999SplitCnt++; } else if (length == 101) { _101SplitCnt++; } else { otherSplitCnt++; } } assertEquals("Should be 100 splits of 999 keys", 100, _999SplitCnt); assertEquals("Should be 1 split of 101 keys", 1, _101SplitCnt); assertEquals("Should be 0 splits of with neither 999 or 101 keys", 0, otherSplitCnt); }
From source file:com.basho.riak.hadoop.RiakRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { try {//from w w w . j av a2 s . c o m RiakInputSplit inputSplit = (RiakInputSplit) split; keys = new ConcurrentLinkedQueue<BucketKey>(inputSplit.getInputs()); initialSize = split.getLength(); client = ClientFactory.getClient(inputSplit.getLocation()); } catch (RiakException e) { throw new IOException(e); } }
From source file:com.flipkart.fdp.migration.distcp.core.MirrorFileInputFormat.java
License:Apache License
private void sortSplits(List<InputSplit> splits) { Collections.sort(splits, new Comparator<InputSplit>() { // @Override public int compare(InputSplit f0, InputSplit f1) { try { if (f1.getLength() > f0.getLength()) return 1; if (f1.getLength() < f0.getLength()) return -1; return 0; } catch (Exception e) { return 0; }//from w ww . j ava 2 s . c om } }); }
From source file:com.google.cloud.hadoop.util.HadoopToStringUtil.java
License:Open Source License
public static String toString(InputSplit input) throws IOException, InterruptedException { if (input == null) { return "null"; }//from w w w. j a v a 2s .co m String result = "InputSplit::"; result += " length:" + input.getLength(); result += " locations: " + Arrays.toString(input.getLocations()); result += " toString(): " + input.toString(); return result; }
From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java
License:Apache License
protected double getTotalMapInputMB() throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); }/*from w w w .j a v a 2 s .c o m*/ long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } if (mapInputBytes == 0) { throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
From source file:com.marklogic.contentpump.AggregateXMLReader.java
License:Apache License
protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException { start = 0;//from w ww. java 2s . co m end = inSplit.getLength(); overflow = false; setFile(((FileSplit) inSplit).getPath()); configFileNameAsCollection(conf, file); fInputStream = fs.open(file); try { xmlSR = f.createXMLStreamReader(fInputStream, encoding); } catch (XMLStreamException e) { LOG.error(e.getMessage(), e); } if (useAutomaticId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } }
From source file:com.marklogic.contentpump.CombineDocumentReader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { initConfig(context);// w w w . j a v a 2 s . c om iterator = new FileIterator(((CombineDocumentSplit) inSplit).getSplits().iterator(), context); bytesTotal = inSplit.getLength(); this.context = context; batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE); }
From source file:com.marklogic.contentpump.CombineDocumentSplit.java
License:Apache License
public CombineDocumentSplit(List<FileSplit> splits) throws IOException, InterruptedException { this.splits = splits; locations = new HashSet<String>(); for (InputSplit split : splits) { length += split.getLength(); for (String loc : split.getLocations()) { if (!locations.contains(loc)) { locations.add(loc);/* ww w.jav a 2 s.c o m*/ } } } }
From source file:com.marklogic.contentpump.CompressedAggXMLReader.java
License:Apache License
protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException { setFile(((FileSplit) inSplit).getPath()); FSDataInputStream fileIn = fs.open(file); String codecString = conf.get(ConfigConstants.CONF_INPUT_COMPRESSION_CODEC, CompressionCodec.ZIP.toString()); if (codecString.equalsIgnoreCase(CompressionCodec.ZIP.toString())) { zipIn = new ZipInputStream(fileIn); codec = CompressionCodec.ZIP;/*from w ww.j a v a2 s . c om*/ while (true) { try { currZipEntry = ((ZipInputStream) zipIn).getNextEntry(); if (currZipEntry == null) { break; } if (currZipEntry.getSize() != 0) { subId = currZipEntry.getName(); break; } } catch (IllegalArgumentException e) { LOG.warn("Skipped a zip entry in : " + file.toUri() + ", reason: " + e.getMessage()); } } if (currZipEntry == null) { // no entry in zip LOG.warn("No valid entry in zip:" + file.toUri()); return; } ByteArrayOutputStream baos; long size = currZipEntry.getSize(); if (size == -1) { baos = new ByteArrayOutputStream(); } else { baos = new ByteArrayOutputStream((int) size); } int nb; while ((nb = zipIn.read(buf, 0, buf.length)) != -1) { baos.write(buf, 0, nb); } try { start = 0; end = baos.size(); xmlSR = f.createXMLStreamReader(new ByteArrayInputStream(baos.toByteArray()), encoding); } catch (XMLStreamException e) { LOG.error(e.getMessage(), e); } } else if (codecString.equalsIgnoreCase(CompressionCodec.GZIP.toString())) { zipIn = new GZIPInputStream(fileIn); codec = CompressionCodec.GZIP; try { start = 0; end = inSplit.getLength(); xmlSR = f.createXMLStreamReader(zipIn, encoding); } catch (XMLStreamException e) { LOG.error(e.getMessage(), e); } } else { throw new UnsupportedOperationException("Unsupported codec: " + codec.name()); } if (useAutomaticId) { idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart()); } }