Example usage for org.apache.hadoop.mapreduce.lib.input FileSplit toString

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input FileSplit toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormatTest.java

License:Apache License

@Test
public void testWriteNewControlFileAndCreateSplits() throws IOException {
    MockupFileSystem fs = new MockupFileSystem();
    Path newControlFile = new Path("newControlFile");
    fs.addFile("newControlFile", true, null);
    Map<String, ArrayList<String>> locationMap = new HashMap<String, ArrayList<String>>() {
        {//from   ww  w . j ava2 s .c  o  m
            put("host1", new ArrayList<String>() {
                {
                    add("line1-1");
                    add("line1-2");
                    add("line1-3");
                }
            });
            put("host2", new ArrayList<String>() {
                {
                    add("line2-1");
                    add("line2-2");
                    add("line2-3");
                    add("line2-4");
                    add("line2-5");
                    add("line2-6");
                }
            });
            put("host3", new ArrayList<String>() {
                {
                    add("line3-1");
                    add("line3-2");
                    add("line3-3");
                    add("line3-4");
                    add("line3-5");
                    add("line3-6");
                    add("line3-7");
                    add("line3-8");
                }
            });
            put("host4", new ArrayList<String>() {
                {
                    add("line4-1");
                    add("line4-2");
                    add("line4-3");
                    add("line4-4");
                    add("line4-5");
                    add("line4-6");
                    add("line4-7");
                    add("line4-8");
                    add("line4-9");
                    add("line4-10");
                }
            });
        }
    };
    List<FileSplit> splits = ControlFileInputFormat.writeNewControlFileAndCreateSplits(newControlFile, fs,
            locationMap, 3);

    FSDataInputStream bis = fs.open(newControlFile);
    try {
        int i = 0;
        for (FileSplit split : splits) {
            LOG.debug(++i + ".split = " + split.toString());
            byte[] content = new byte[(int) split.getLength()];
            bis.read((int) split.getStart(), content, 0, (int) split.getLength());
            String cont = new String(content);
            LOG.debug("  content = " + new String(content));
            if (cont.startsWith("line1-1")) {
                String expected = "";
                for (String line : locationMap.get("host1")) {
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    expected += line + "\n";
                    if (++j == 3)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line2-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host2")) {
                    if (++j <= 3)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    expected += line + "\n";
                    if (++j == 4)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line3-5")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host3")) {
                    if (++j <= 4)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-1")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    expected += line + "\n";
                    if (++j == 3)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-4")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 3)
                        continue;
                    expected += line + "\n";
                    if (++j > 7)
                        break;
                }
                assertEquals(expected, cont);
            } else if (cont.startsWith("line4-7")) {
                String expected = "";
                int j = 0;
                for (String line : locationMap.get("host4")) {
                    if (++j <= 6)
                        continue;
                    expected += line + "\n";
                }
                assertEquals(expected, cont);
            } else {
                fail("wrong split");
            }
        }
    } finally {
        bis.close();
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopSplitWrapperSelfTest.java

License:Apache License

/**
 * Tests serialization of wrapper and the wrapped native split.
 * @throws Exception If fails./*  w w w . j  a  v a  2s. c  om*/
 */
public void testSerialization() throws Exception {
    FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500,
            new String[] { "host1", "host2" });

    assertEquals("/path/to/file:100+500", nativeSplit.toString());

    GridHadoopSplitWrapper split = GridHadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations());

    assertEquals("[host1, host2]", Arrays.toString(split.hosts()));

    ByteArrayOutputStream buf = new ByteArrayOutputStream();

    ObjectOutput out = new ObjectOutputStream(buf);

    out.writeObject(split);

    ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray()));

    final GridHadoopSplitWrapper res = (GridHadoopSplitWrapper) in.readObject();

    assertEquals("/path/to/file:100+500", GridHadoopUtils.unwrapSplit(res).toString());

    GridTestUtils.assertThrows(log, new Callable<Object>() {
        @Override
        public Object call() throws Exception {
            res.hosts();

            return null;
        }
    }, AssertionError.class, null);
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopSplitWrapperSelfTest.java

License:Apache License

/**
 * Tests serialization of wrapper and the wrapped native split.
 * @throws Exception If fails.//from  www. j  av  a  2 s .  com
 */
public void testSerialization() throws Exception {
    FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500,
            new String[] { "host1", "host2" });

    assertEquals("/path/to/file:100+500", nativeSplit.toString());

    HadoopSplitWrapper split = HadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations());

    assertEquals("[host1, host2]", Arrays.toString(split.hosts()));

    ByteArrayOutputStream buf = new ByteArrayOutputStream();

    ObjectOutput out = new ObjectOutputStream(buf);

    out.writeObject(split);

    ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray()));

    final HadoopSplitWrapper res = (HadoopSplitWrapper) in.readObject();

    assertEquals("/path/to/file:100+500", HadoopUtils.unwrapSplit(res).toString());

    GridTestUtils.assertThrows(log, new Callable<Object>() {
        @Override
        public Object call() throws Exception {
            res.hosts();

            return null;
        }
    }, AssertionError.class, null);
}

From source file:org.apache.trevni.avro.mapreduce.AvroTrevniRecordReaderBase.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   w w  w .ja  v  a2 s  . c om*/
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    final FileSplit file = (FileSplit) inputSplit;
    context.setStatus(file.toString());

    final AvroColumnReader.Params params = new AvroColumnReader.Params(
            new HadoopInput(file.getPath(), context.getConfiguration()));
    params.setModel(ReflectData.get());

    if (AvroJob.getInputKeySchema(context.getConfiguration()) != null) {
        params.setSchema(AvroJob.getInputKeySchema(context.getConfiguration()));
    }

    reader = new AvroColumnReader<T>(params);
    rows = reader.getRowCount();
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.input.adaptor.AdaptorRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    System.out.println(split.toString());
    Configuration job = context.getConfiguration();
    System.err.println(split.getPath().toString());
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from  www . j  a  v  a 2 s .  c  o m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:org.mrgeo.hdfs.vector.DelimitedVectorInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false);
    if (useNLineFormat) {
        List<InputSplit> splits = new NLineInputFormat().getSplits(context);
        // This is a workaround to what appears to be a bug in in how NLineInputFormat
        // computes its splits. When there are multiple splits in a file, it seems
        // the start position in the last split is off by one. Note that this corrective
        // code needs to check the last split for each different file that appears
        // in the list of splits.
        for (int index = 2; index < splits.size(); index++) {
            FileSplit previousSplit = (FileSplit) splits.get(index - 1);
            FileSplit currSplit = (FileSplit) splits.get(index);
            // If this index is the last split, or we've moved on to splits from a different
            // file, then we need to adjust the last split for that file.
            int lastFileIndex = -1;
            if (index == splits.size() - 1) {
                lastFileIndex = index;/*from w  ww  .  j  a v  a2s  .c  o  m*/
            } else if (!currSplit.getPath().equals(previousSplit.getPath())) {
                lastFileIndex = index - 1;
            }
            if (lastFileIndex >= 2) {
                FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex);
                FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1);
                if (lastFileSplit.getPath().equals(priorSplit.getPath())) {
                    if (priorSplit.getPath().equals(lastFileSplit.getPath())
                            && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) {
                        // Adjust the start of previous split
                        FileSplit replacement = new FileSplit(lastFileSplit.getPath(),
                                priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1,
                                lastFileSplit.getLocations());
                        log.info("Replacing split: " + lastFileSplit.toString());
                        log.info("  With split: " + replacement.toString());
                        splits.set(lastFileIndex, replacement);
                    }
                }
            }
        }
        return splits;
    } else {
        List<InputSplit> splits = new TextInputFormat().getSplits(context);
        return splits;
    }
}

From source file:org.mrgeo.hdfs.vector.HdfsVectorInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false);
    if (useNLineFormat) {
        List<InputSplit> splits = new NLineInputFormat().getSplits(context);
        // This is a workaround to what appears to be a bug in in how NLineInputFormat
        // computes its splits. When there are multiple splits in a file, it seems
        // the start position in the last split is off by one. Note that this corrective
        // code needs to check the last split for each different file that appears
        // in the list of splits.
        for (int index = 2; index < splits.size(); index++) {
            FileSplit previousSplit = (FileSplit) splits.get(index - 1);
            FileSplit currSplit = (FileSplit) splits.get(index);
            // If this index is the last split, or we've moved on to splits from a different
            // file, then we need to adjust the last split for that file.
            int lastFileIndex = -1;
            if (index == splits.size() - 1) {
                lastFileIndex = index;/*from   w  ww.  j  av  a  2  s .co  m*/
            } else if (!currSplit.getPath().equals(previousSplit.getPath())) {
                lastFileIndex = index - 1;
            }
            if (lastFileIndex >= 2) {
                FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex);
                FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1);
                if (lastFileSplit.getPath().equals(priorSplit.getPath()))
                    if (priorSplit.getPath().equals(lastFileSplit.getPath())
                            && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) {
                        // Adjust the start of previous split
                        FileSplit replacement = new FileSplit(lastFileSplit.getPath(),
                                priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1,
                                lastFileSplit.getLocations());
                        log.info("Replacing split: " + lastFileSplit.toString());
                        log.info("  With split: " + replacement.toString());
                        splits.set(lastFileIndex, replacement);
                    }
            }
        }
        return splits;
    } else {
        List<InputSplit> splits = new TextInputFormat().getSplits(context);
        return splits;
    }
}