Example usage for org.apache.hadoop.mapreduce InputSplit getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputSplit getLength.

Prototype

public abstract long getLength() throws IOException, InterruptedException;

Source Link

Document

Get the size of the split, so that the input splits can be sorted by size.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }/*w  ww.jav a 2s .co m*/
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.basho.riak.hadoop.RiakInputFormatTest.java

License:Apache License

@Test
public void getSplits() throws Exception {
    final List<BucketKey> bks = new LinkedList<BucketKey>();
    for (int i = 0; i < 100001; i++) {
        bks.add(new BucketKey(BUCKET, KEY + i));
    }/* w ww.  j  a  v a  2 s .  c o m*/

    RiakLocation[] locations = new RiakLocation[] { new RiakLocation("host1", 8091),
            new RiakLocation("host2", 8091), new RiakLocation("host3", 8091), new RiakLocation("host4", 8091) };

    List<InputSplit> splits = RiakInputFormat.getSplits(bks, locations, 999);

    assertEquals("Expected 101 splits", 101, splits.size());

    int _999SplitCnt = 0;
    int _101SplitCnt = 0;
    int otherSplitCnt = 0;

    for (InputSplit is : splits) {
        long length = is.getLength();

        if (length == 999) {
            _999SplitCnt++;
        } else if (length == 101) {
            _101SplitCnt++;
        } else {
            otherSplitCnt++;
        }
    }

    assertEquals("Should be 100 splits of 999 keys", 100, _999SplitCnt);
    assertEquals("Should be 1 split of 101 keys", 1, _101SplitCnt);
    assertEquals("Should be 0 splits of with neither 999 or 101 keys", 0, otherSplitCnt);
}

From source file:com.basho.riak.hadoop.RiakRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {//from   w w  w  .  j  av  a2 s  .  c  o  m
        RiakInputSplit inputSplit = (RiakInputSplit) split;
        keys = new ConcurrentLinkedQueue<BucketKey>(inputSplit.getInputs());
        initialSize = split.getLength();
        client = ClientFactory.getClient(inputSplit.getLocation());
    } catch (RiakException e) {
        throw new IOException(e);
    }
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorFileInputFormat.java

License:Apache License

private void sortSplits(List<InputSplit> splits) {
    Collections.sort(splits, new Comparator<InputSplit>() {
        // @Override
        public int compare(InputSplit f0, InputSplit f1) {
            try {
                if (f1.getLength() > f0.getLength())
                    return 1;
                if (f1.getLength() < f0.getLength())
                    return -1;
                return 0;
            } catch (Exception e) {
                return 0;
            }//from   w ww . j ava 2 s  . c  om
        }
    });
}

From source file:com.google.cloud.hadoop.util.HadoopToStringUtil.java

License:Open Source License

public static String toString(InputSplit input) throws IOException, InterruptedException {
    if (input == null) {
        return "null";
    }//from  w  w w. j  a  v  a 2s .co  m

    String result = "InputSplit::";
    result += " length:" + input.getLength();
    result += " locations: " + Arrays.toString(input.getLocations());
    result += " toString(): " + input.toString();
    return result;
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected double getTotalMapInputMB()
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    if (job == null) {
        throw new JobException("Job is null");
    }/*from w  w w  .j  a v a 2 s  .c o m*/

    long mapInputBytes = 0;
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
    for (InputSplit split : input.getSplits(job)) {
        mapInputBytes += split.getLength();
    }
    if (mapInputBytes == 0) {
        throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!");
    }
    double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
    return totalMapInputMB;
}

From source file:com.marklogic.contentpump.AggregateXMLReader.java

License:Apache License

protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException {
    start = 0;//from  w ww.  java 2s . co  m
    end = inSplit.getLength();
    overflow = false;
    setFile(((FileSplit) inSplit).getPath());
    configFileNameAsCollection(conf, file);

    fInputStream = fs.open(file);

    try {
        xmlSR = f.createXMLStreamReader(fInputStream, encoding);
    } catch (XMLStreamException e) {
        LOG.error(e.getMessage(), e);
    }

    if (useAutomaticId) {
        idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
    }
}

From source file:com.marklogic.contentpump.CombineDocumentReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);// w  w w .  j  a  v a 2 s .  c  om

    iterator = new FileIterator(((CombineDocumentSplit) inSplit).getSplits().iterator(), context);
    bytesTotal = inSplit.getLength();
    this.context = context;
    batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE);
}

From source file:com.marklogic.contentpump.CombineDocumentSplit.java

License:Apache License

public CombineDocumentSplit(List<FileSplit> splits) throws IOException, InterruptedException {
    this.splits = splits;
    locations = new HashSet<String>();
    for (InputSplit split : splits) {
        length += split.getLength();
        for (String loc : split.getLocations()) {
            if (!locations.contains(loc)) {
                locations.add(loc);/*  ww w.jav  a 2 s.c o  m*/
            }
        }
    }
}

From source file:com.marklogic.contentpump.CompressedAggXMLReader.java

License:Apache License

protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException {
    setFile(((FileSplit) inSplit).getPath());
    FSDataInputStream fileIn = fs.open(file);
    String codecString = conf.get(ConfigConstants.CONF_INPUT_COMPRESSION_CODEC,
            CompressionCodec.ZIP.toString());
    if (codecString.equalsIgnoreCase(CompressionCodec.ZIP.toString())) {
        zipIn = new ZipInputStream(fileIn);
        codec = CompressionCodec.ZIP;/*from  w  ww.j a  v  a2 s  . c om*/
        while (true) {
            try {
                currZipEntry = ((ZipInputStream) zipIn).getNextEntry();
                if (currZipEntry == null) {
                    break;
                }
                if (currZipEntry.getSize() != 0) {
                    subId = currZipEntry.getName();
                    break;
                }
            } catch (IllegalArgumentException e) {
                LOG.warn("Skipped a zip entry in : " + file.toUri() + ", reason: " + e.getMessage());
            }
        }
        if (currZipEntry == null) { // no entry in zip
            LOG.warn("No valid entry in zip:" + file.toUri());
            return;
        }
        ByteArrayOutputStream baos;
        long size = currZipEntry.getSize();
        if (size == -1) {
            baos = new ByteArrayOutputStream();
        } else {
            baos = new ByteArrayOutputStream((int) size);
        }
        int nb;
        while ((nb = zipIn.read(buf, 0, buf.length)) != -1) {
            baos.write(buf, 0, nb);
        }
        try {
            start = 0;
            end = baos.size();
            xmlSR = f.createXMLStreamReader(new ByteArrayInputStream(baos.toByteArray()), encoding);
        } catch (XMLStreamException e) {
            LOG.error(e.getMessage(), e);
        }

    } else if (codecString.equalsIgnoreCase(CompressionCodec.GZIP.toString())) {
        zipIn = new GZIPInputStream(fileIn);
        codec = CompressionCodec.GZIP;
        try {
            start = 0;
            end = inSplit.getLength();
            xmlSR = f.createXMLStreamReader(zipIn, encoding);
        } catch (XMLStreamException e) {
            LOG.error(e.getMessage(), e);
        }
    } else {
        throw new UnsupportedOperationException("Unsupported codec: " + codec.name());
    }
    if (useAutomaticId) {
        idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
    }
}