Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:org.apache.hama.graph.MinAggregator.java

License:Apache License

@Override
public void aggregate(IntWritable value) {
    if (value.get() < min) {
        min = value.get();
    }
}

From source file:org.apache.hama.pipes.TestPipes.java

License:Apache License

static void verifyMatrixMultiplicationOutput(HamaConfiguration conf, Path outputPath, double[][] matrix)
        throws IOException {
    FileStatus[] listStatus = fs.listStatus(outputPath);
    for (FileStatus status : listStatus) {
        if (!status.isDir()) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf);
            IntWritable key = new IntWritable();
            PipesVectorWritable value = new PipesVectorWritable();
            int rowIdx = 0;
            while (reader.next(key, value)) {
                assertEquals("Expected rowIdx: '" + rowIdx + "' != '" + key.get() + "'", rowIdx, key.get());

                DoubleVector rowVector = value.getVector();

                for (int colIdx = 0; colIdx < rowVector.getLength(); colIdx++) {
                    double colValue = rowVector.get(colIdx);
                    assertEquals(/*from w ww  .  j  a va 2s  .  c o m*/
                            "Expected colValue: '" + matrix[rowIdx][colIdx] + "' != '" + colValue + "' in row: "
                                    + rowIdx + " values: " + rowVector.toString(),
                            matrix[rowIdx][colIdx], colValue, Math.pow(10, (DOUBLE_PRECISION * -1)));
                }
                rowIdx++;
            }
            reader.close();
        }
    }
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values.//ww w. j a va2 s  .  c  o m
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri()));
    Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri()));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A word count test job that reads a input file and outputs the count of
 * words to a text file and sequence file with different key values.
 *//*w ww . ja v  a  2s.c  o m*/
@Test
public void testMultiOutputFormatWithReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutWithReduce");

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(MultiOutWordCountReducer.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);

    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    configurer.configure();

    String fileContent = "Hello World Hello World World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-r-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = "Hello World".split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 2), value.get());
    }
    Assert.assertFalse(reader.next(key, value));

}

From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values./*w ww .jav a2  s.c o  m*/
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(inputFileURI));
    URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(dummyFileURI));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}

From source file:org.apache.ignite.internal.processors.hadoop.examples.GridHadoopWordCount2Reducer.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w  ww  .  j av  a 2 s. c  om
public void reduce(Text key, Iterable<IntWritable> values, Context ctx)
        throws IOException, InterruptedException {
    assert wasConfigured : "Reducer should be configured";
    assert wasSetUp : "Reducer should be set up";

    int wordCnt = 0;

    for (IntWritable value : values)
        wordCnt += value.get();

    totalWordCnt.set(wordCnt);

    ctx.write(key, totalWordCnt);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2Reducer.java

License:Apache License

/** {@inheritDoc} */
@Override//www. j a  va  2  s .c o m
public void reduce(Text key, Iterable<IntWritable> values, Context ctx)
        throws IOException, InterruptedException {
    assert wasConfigured : "Reducer should be configured";
    assert wasSetUp : "Reducer should be set up";

    int wordCnt = 0;

    for (IntWritable value : values)
        wordCnt += value.get();

    totalWordCnt.set(wordCnt);

    ctx.write(key, totalWordCnt);

    reduceError();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopConcurrentHashMultimapSelftest.java

License:Apache License

private void check(HadoopConcurrentHashMultimap m, Multimap<Integer, Integer> mm,
        final Multimap<Integer, Integer> vis, HadoopTaskContext taskCtx) throws Exception {
    final HadoopTaskInput in = m.input(taskCtx);

    Map<Integer, Collection<Integer>> mmm = mm.asMap();

    int keys = 0;

    while (in.next()) {
        keys++;/*w ww .j  av  a 2  s.c om*/

        IntWritable k = (IntWritable) in.key();

        assertNotNull(k);

        Deque<Integer> vs = new LinkedList<>();

        Iterator<?> it = in.values();

        while (it.hasNext())
            vs.addFirst(((IntWritable) it.next()).get());

        Collection<Integer> exp = mmm.get(k.get());

        assertEquals(exp, vs);
    }

    assertEquals(mmm.size(), keys);

    assertEquals(m.keys(), keys);

    X.println("keys: " + keys + " cap: " + m.capacity());

    // Check visitor.

    final byte[] buf = new byte[4];

    final GridDataInput dataInput = new GridUnsafeDataInput();

    m.visit(false, new HadoopConcurrentHashMultimap.Visitor() {
        /** */
        IntWritable key = new IntWritable();

        /** */
        IntWritable val = new IntWritable();

        @Override
        public void onKey(long keyPtr, int keySize) {
            read(keyPtr, keySize, key);
        }

        @Override
        public void onValue(long valPtr, int valSize) {
            read(valPtr, valSize, val);

            vis.put(key.get(), val.get());
        }

        private void read(long ptr, int size, Writable w) {
            assert size == 4 : size;

            GridUnsafe.copyOffheapHeap(ptr, buf, GridUnsafe.BYTE_ARR_OFF, size);

            dataInput.bytes(buf, size);

            try {
                w.readFields(dataInput);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    });

    //        X.println("vis: " + vis);

    assertEquals(mm, vis);

    in.close();
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopConcurrentHashMultimapSelftest.java

License:Apache License

/**
 * @throws Exception if failed.//from  ww w .  j a  va 2 s  .co m
 */
public void testMultiThreaded() throws Exception {
    GridUnsafeMemory mem = new GridUnsafeMemory(0);

    X.println("___ Started");

    Random rnd = new GridRandom();

    for (int i = 0; i < 20; i++) {
        HadoopJobInfo job = new JobInfo();

        final HadoopTaskContext taskCtx = new TaskContext();

        final HadoopConcurrentHashMultimap m = new HadoopConcurrentHashMultimap(job, mem, 16);

        final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>();

        X.println("___ MT");

        multithreaded(new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                X.println("___ TH in");

                Random rnd = new GridRandom();

                IntWritable key = new IntWritable();
                IntWritable val = new IntWritable();

                HadoopMultimap.Adder a = m.startAdding(taskCtx);

                for (int i = 0; i < 50000; i++) {
                    int k = rnd.nextInt(32000);
                    int v = rnd.nextInt();

                    key.set(k);
                    val.set(v);

                    a.write(key, val);

                    Collection<Integer> list = mm.get(k);

                    if (list == null) {
                        list = new ConcurrentLinkedQueue<>();

                        Collection<Integer> old = mm.putIfAbsent(k, list);

                        if (old != null)
                            list = old;
                    }

                    list.add(v);
                }

                a.close();

                X.println("___ TH out");

                return null;
            }
        }, 3 + rnd.nextInt(27));

        X.println("___ Check: " + m.capacity());

        assertEquals(mm.size(), m.keys());

        assertTrue(m.capacity() > 32000);

        HadoopTaskInput in = m.input(taskCtx);

        while (in.next()) {
            IntWritable key = (IntWritable) in.key();

            Iterator<?> valsIter = in.values();

            Collection<Integer> vals = mm.remove(key.get());

            assertNotNull(vals);

            while (valsIter.hasNext()) {
                IntWritable val = (IntWritable) valsIter.next();

                assertTrue(vals.remove(val.get()));
            }

            assertTrue(vals.isEmpty());
        }

        in.close();
        m.close();

        assertEquals(0, mem.allocatedSize());
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopHashMapSelfTest.java

License:Apache License

private void check(HadoopHashMultimap m, Multimap<Integer, Integer> mm, HadoopTaskContext taskCtx)
        throws Exception {
    final HadoopTaskInput in = m.input(taskCtx);

    Map<Integer, Collection<Integer>> mmm = mm.asMap();

    int keys = 0;

    while (in.next()) {
        keys++;/*from  ww  w  .j  av a2s  . c  o  m*/

        IntWritable k = (IntWritable) in.key();

        assertNotNull(k);

        ArrayList<Integer> vs = new ArrayList<>();

        Iterator<?> it = in.values();

        while (it.hasNext())
            vs.add(((IntWritable) it.next()).get());

        Collection<Integer> exp = mmm.get(k.get());

        assertEquals(sorted(exp), sorted(vs));
    }

    X.println("keys: " + keys + " cap: " + m.capacity());

    assertEquals(mmm.size(), keys);

    assertEquals(m.keys(), keys);

    in.close();
}