Example usage for org.apache.hadoop.mapreduce InputSplit getLength

List of usage examples for org.apache.hadoop.mapreduce InputSplit getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputSplit getLength.

Prototype

public abstract long getLength() throws IOException, InterruptedException;

Source Link

Document

Get the size of the split, so that the input splits can be sorted by size.

Usage

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }/*w  ww.jav a 2s .co m*/
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.basho.riak.hadoop.RiakInputFormatTest.java

License:Apache License

@Test
public void getSplits() throws Exception {
    final List<BucketKey> bks = new LinkedList<BucketKey>();
    for (int i = 0; i < 100001; i++) {
        bks.add(new BucketKey(BUCKET, KEY + i));
    }/* w ww.  j  a  v a  2 s .  c o m*/

    RiakLocation[] locations = new RiakLocation[] { new RiakLocation("host1", 8091),
            new RiakLocation("host2", 8091), new RiakLocation("host3", 8091), new RiakLocation("host4", 8091) };

    List<InputSplit> splits = RiakInputFormat.getSplits(bks, locations, 999);

    assertEquals("Expected 101 splits", 101, splits.size());

    int _999SplitCnt = 0;
    int _101SplitCnt = 0;
    int otherSplitCnt = 0;

    for (InputSplit is : splits) {
        long length = is.getLength();

        if (length == 999) {
            _999SplitCnt++;
        } else if (length == 101) {
            _101SplitCnt++;
        } else {
            otherSplitCnt++;
        }
    }

    assertEquals("Should be 100 splits of 999 keys", 100, _999SplitCnt);
    assertEquals("Should be 1 split of 101 keys", 1, _101SplitCnt);
    assertEquals("Should be 0 splits of with neither 999 or 101 keys", 0, otherSplitCnt);
}

From source file:com.basho.riak.hadoop.RiakRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    try {//from   w w  w  .  j  av  a2 s  .  c  o  m
        RiakInputSplit inputSplit = (RiakInputSplit) split;
        keys = new ConcurrentLinkedQueue<BucketKey>(inputSplit.getInputs());
        initialSize = split.getLength();
        client = ClientFactory.getClient(inputSplit.getLocation());
    } catch (RiakException e) {
        throw new IOException(e);
    }
}

From source file:com.flipkart.fdp.migration.distcp.core.MirrorFileInputFormat.java

License:Apache License

private void sortSplits(List<InputSplit> splits) {
    Collections.sort(splits, new Comparator<InputSplit>() {
        // @Override
        public int compare(InputSplit f0, InputSplit f1) {
            try {
                if (f1.getLength() > f0.getLength())
                    return 1;
                if (f1.getLength() < f0.getLength())
                    return -1;
                return 0;
            } catch (Exception e) {
                return 0;
            }//from   w ww . j ava 2 s  . c  om
        }
    });
}

From source file:com.google.cloud.hadoop.util.HadoopToStringUtil.java

License:Open Source License

public static String toString(InputSplit input) throws IOException, InterruptedException {
    if (input == null) {
        return "null";
    }//from  w  w w. j  a  v  a 2s .co  m

    String result = "InputSplit::";
    result += " length:" + input.getLength();
    result += " locations: " + Arrays.toString(input.getLocations());
    result += " toString(): " + input.toString();
    return result;
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected double getTotalMapInputMB()
        throws ClassNotFoundException, IOException, InterruptedException, JobException {
    if (job == null) {
        throw new JobException("Job is null");
    }/*from w  w w  .j  a v a 2 s  .c o m*/

    long mapInputBytes = 0;
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration());
    for (InputSplit split : input.getSplits(job)) {
        mapInputBytes += split.getLength();
    }
    if (mapInputBytes == 0) {
        throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!");
    }
    double totalMapInputMB = (double) mapInputBytes / 1024 / 1024;
    return totalMapInputMB;
}

From source file:com.marklogic.contentpump.AggregateXMLReader.java

License:Apache License

protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException {
    start = 0;//from  w ww.  java 2s . co  m
    end = inSplit.getLength();
    overflow = false;
    setFile(((FileSplit) inSplit).getPath());
    configFileNameAsCollection(conf, file);

    fInputStream = fs.open(file);

    try {
        xmlSR = f.createXMLStreamReader(fInputStream, encoding);
    } catch (XMLStreamException e) {
        LOG.error(e.getMessage(), e);
    }

    if (useAutomaticId) {
        idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
    }
}

From source file:com.marklogic.contentpump.CombineDocumentReader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    initConfig(context);// w  w w .  j  a  v a 2 s .  c  om

    iterator = new FileIterator(((CombineDocumentSplit) inSplit).getSplits().iterator(), context);
    bytesTotal = inSplit.getLength();
    this.context = context;
    batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE);
}

From source file:com.marklogic.contentpump.CombineDocumentSplit.java

License:Apache License

public CombineDocumentSplit(List<FileSplit> splits) throws IOException, InterruptedException {
    this.splits = splits;
    locations = new HashSet<String>();
    for (InputSplit split : splits) {
        length += split.getLength();
        for (String loc : split.getLocations()) {
            if (!locations.contains(loc)) {
                locations.add(loc);/*  ww w.jav  a 2 s.c o  m*/
            }
        }
    }
}

From source file:com.marklogic.contentpump.CompressedAggXMLReader.java

License:Apache License

protected void initStreamReader(InputSplit inSplit) throws IOException, InterruptedException {
    setFile(((FileSplit) inSplit).getPath());
    FSDataInputStream fileIn = fs.open(file);
    String codecString = conf.get(ConfigConstants.CONF_INPUT_COMPRESSION_CODEC,
            CompressionCodec.ZIP.toString());
    if (codecString.equalsIgnoreCase(CompressionCodec.ZIP.toString())) {
        zipIn = new ZipInputStream(fileIn);
        codec = CompressionCodec.ZIP;/*from  w  ww.j a  v  a2 s  . c om*/
        while (true) {
            try {
                currZipEntry = ((ZipInputStream) zipIn).getNextEntry();
                if (currZipEntry == null) {
                    break;
                }
                if (currZipEntry.getSize() != 0) {
                    subId = currZipEntry.getName();
                    break;
                }
            } catch (IllegalArgumentException e) {
                LOG.warn("Skipped a zip entry in : " + file.toUri() + ", reason: " + e.getMessage());
            }
        }
        if (currZipEntry == null) { // no entry in zip
            LOG.warn("No valid entry in zip:" + file.toUri());
            return;
        }
        ByteArrayOutputStream baos;
        long size = currZipEntry.getSize();
        if (size == -1) {
            baos = new ByteArrayOutputStream();
        } else {
            baos = new ByteArrayOutputStream((int) size);
        }
        int nb;
        while ((nb = zipIn.read(buf, 0, buf.length)) != -1) {
            baos.write(buf, 0, nb);
        }
        try {
            start = 0;
            end = baos.size();
            xmlSR = f.createXMLStreamReader(new ByteArrayInputStream(baos.toByteArray()), encoding);
        } catch (XMLStreamException e) {
            LOG.error(e.getMessage(), e);
        }

    } else if (codecString.equalsIgnoreCase(CompressionCodec.GZIP.toString())) {
        zipIn = new GZIPInputStream(fileIn);
        codec = CompressionCodec.GZIP;
        try {
            start = 0;
            end = inSplit.getLength();
            xmlSR = f.createXMLStreamReader(zipIn, encoding);
        } catch (XMLStreamException e) {
            LOG.error(e.getMessage(), e);
        }
    } else {
        throw new UnsupportedOperationException("Unsupported codec: " + codec.name());
    }
    if (useAutomaticId) {
        idGen = new IdGenerator(file.toUri().getPath() + "-" + ((FileSplit) inSplit).getStart());
    }
}