List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:org.apache.hama.graph.MinAggregator.java
License:Apache License
@Override public void aggregate(IntWritable value) { if (value.get() < min) { min = value.get(); } }
From source file:org.apache.hama.pipes.TestPipes.java
License:Apache License
static void verifyMatrixMultiplicationOutput(HamaConfiguration conf, Path outputPath, double[][] matrix) throws IOException { FileStatus[] listStatus = fs.listStatus(outputPath); for (FileStatus status : listStatus) { if (!status.isDir()) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, status.getPath(), conf); IntWritable key = new IntWritable(); PipesVectorWritable value = new PipesVectorWritable(); int rowIdx = 0; while (reader.next(key, value)) { assertEquals("Expected rowIdx: '" + rowIdx + "' != '" + key.get() + "'", rowIdx, key.get()); DoubleVector rowVector = value.getVector(); for (int colIdx = 0; colIdx < rowVector.getLength(); colIdx++) { double colValue = rowVector.get(colIdx); assertEquals(/*from w ww . j a va 2s . c o m*/ "Expected colValue: '" + matrix[rowIdx][colIdx] + "' != '" + colValue + "' in row: " + rowIdx + " values: " + rowVector.toString(), matrix[rowIdx][colIdx], colValue, Math.pow(10, (DOUBLE_PRECISION * -1))); } rowIdx++; } reader.close(); } } }
From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A test job that reads a input file and outputs each word and the index of * the word encountered to a text file and sequence file with different key * values.//ww w. j a va2 s . c o m */ @Test public void testMultiOutputFormatWithoutReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutNoReduce"); job.setMapperClass(MultiOutWordIndexMapper.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); String fileContent = "Hello World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); //Test for merging of configs DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); String dummyFile = createInputFile("dummy file"); DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs); // duplicate of the value. Merging should remove duplicates DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs); configurer.configure(); // Verify if the configs are merged Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); List<Path> fileClassPathsList = Arrays.asList(fileClassPaths); Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); List<URI> cacheFilesList = Arrays.asList(cacheFiles); Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-m-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-m-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = fileContent.split(" "); Assert.assertEquals(words.length, textOutput.length); LOG.info("Verifying file contents"); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 1), value.get()); } Assert.assertFalse(reader.next(key, value)); }
From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A word count test job that reads a input file and outputs the count of * words to a text file and sequence file with different key values. *//*w ww . ja v a 2s.c o m*/ @Test public void testMultiOutputFormatWithReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutWithReduce"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(MultiOutWordCountReducer.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); configurer.configure(); String fileContent = "Hello World Hello World World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-r-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-r-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = "Hello World".split(" "); Assert.assertEquals(words.length, textOutput.length); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 2), value.get()); } Assert.assertFalse(reader.next(key, value)); }
From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A test job that reads a input file and outputs each word and the index of * the word encountered to a text file and sequence file with different key * values./*w ww .jav a2 s.c o m*/ */ @Test public void testMultiOutputFormatWithoutReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutNoReduce"); job.setMapperClass(MultiOutWordIndexMapper.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); String fileContent = "Hello World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); //Test for merging of configs DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); String dummyFile = createInputFile("dummy file"); DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs); // duplicate of the value. Merging should remove duplicates DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs); configurer.configure(); // Verify if the configs are merged Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); List<Path> fileClassPathsList = Arrays.asList(fileClassPaths); Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(inputFile))); Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(dummyFile))); URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); List<URI> cacheFilesList = Arrays.asList(cacheFiles); URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri(); Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList, cacheFilesList.contains(inputFileURI)); URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri(); Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList, cacheFilesList.contains(dummyFileURI)); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-m-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-m-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = fileContent.split(" "); Assert.assertEquals(words.length, textOutput.length); LOG.info("Verifying file contents"); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 1), value.get()); } Assert.assertFalse(reader.next(key, value)); }
From source file:org.apache.ignite.internal.processors.hadoop.examples.GridHadoopWordCount2Reducer.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww . j av a 2 s. c om public void reduce(Text key, Iterable<IntWritable> values, Context ctx) throws IOException, InterruptedException { assert wasConfigured : "Reducer should be configured"; assert wasSetUp : "Reducer should be set up"; int wordCnt = 0; for (IntWritable value : values) wordCnt += value.get(); totalWordCnt.set(wordCnt); ctx.write(key, totalWordCnt); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2Reducer.java
License:Apache License
/** {@inheritDoc} */ @Override//www. j a va 2 s .c o m public void reduce(Text key, Iterable<IntWritable> values, Context ctx) throws IOException, InterruptedException { assert wasConfigured : "Reducer should be configured"; assert wasSetUp : "Reducer should be set up"; int wordCnt = 0; for (IntWritable value : values) wordCnt += value.get(); totalWordCnt.set(wordCnt); ctx.write(key, totalWordCnt); reduceError(); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopConcurrentHashMultimapSelftest.java
License:Apache License
private void check(HadoopConcurrentHashMultimap m, Multimap<Integer, Integer> mm, final Multimap<Integer, Integer> vis, HadoopTaskContext taskCtx) throws Exception { final HadoopTaskInput in = m.input(taskCtx); Map<Integer, Collection<Integer>> mmm = mm.asMap(); int keys = 0; while (in.next()) { keys++;/*w ww .j av a 2 s.c om*/ IntWritable k = (IntWritable) in.key(); assertNotNull(k); Deque<Integer> vs = new LinkedList<>(); Iterator<?> it = in.values(); while (it.hasNext()) vs.addFirst(((IntWritable) it.next()).get()); Collection<Integer> exp = mmm.get(k.get()); assertEquals(exp, vs); } assertEquals(mmm.size(), keys); assertEquals(m.keys(), keys); X.println("keys: " + keys + " cap: " + m.capacity()); // Check visitor. final byte[] buf = new byte[4]; final GridDataInput dataInput = new GridUnsafeDataInput(); m.visit(false, new HadoopConcurrentHashMultimap.Visitor() { /** */ IntWritable key = new IntWritable(); /** */ IntWritable val = new IntWritable(); @Override public void onKey(long keyPtr, int keySize) { read(keyPtr, keySize, key); } @Override public void onValue(long valPtr, int valSize) { read(valPtr, valSize, val); vis.put(key.get(), val.get()); } private void read(long ptr, int size, Writable w) { assert size == 4 : size; GridUnsafe.copyOffheapHeap(ptr, buf, GridUnsafe.BYTE_ARR_OFF, size); dataInput.bytes(buf, size); try { w.readFields(dataInput); } catch (IOException e) { throw new RuntimeException(e); } } }); // X.println("vis: " + vis); assertEquals(mm, vis); in.close(); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopConcurrentHashMultimapSelftest.java
License:Apache License
/** * @throws Exception if failed.//from ww w . j a va 2 s .co m */ public void testMultiThreaded() throws Exception { GridUnsafeMemory mem = new GridUnsafeMemory(0); X.println("___ Started"); Random rnd = new GridRandom(); for (int i = 0; i < 20; i++) { HadoopJobInfo job = new JobInfo(); final HadoopTaskContext taskCtx = new TaskContext(); final HadoopConcurrentHashMultimap m = new HadoopConcurrentHashMultimap(job, mem, 16); final ConcurrentMap<Integer, Collection<Integer>> mm = new ConcurrentHashMap<>(); X.println("___ MT"); multithreaded(new Callable<Object>() { @Override public Object call() throws Exception { X.println("___ TH in"); Random rnd = new GridRandom(); IntWritable key = new IntWritable(); IntWritable val = new IntWritable(); HadoopMultimap.Adder a = m.startAdding(taskCtx); for (int i = 0; i < 50000; i++) { int k = rnd.nextInt(32000); int v = rnd.nextInt(); key.set(k); val.set(v); a.write(key, val); Collection<Integer> list = mm.get(k); if (list == null) { list = new ConcurrentLinkedQueue<>(); Collection<Integer> old = mm.putIfAbsent(k, list); if (old != null) list = old; } list.add(v); } a.close(); X.println("___ TH out"); return null; } }, 3 + rnd.nextInt(27)); X.println("___ Check: " + m.capacity()); assertEquals(mm.size(), m.keys()); assertTrue(m.capacity() > 32000); HadoopTaskInput in = m.input(taskCtx); while (in.next()) { IntWritable key = (IntWritable) in.key(); Iterator<?> valsIter = in.values(); Collection<Integer> vals = mm.remove(key.get()); assertNotNull(vals); while (valsIter.hasNext()) { IntWritable val = (IntWritable) valsIter.next(); assertTrue(vals.remove(val.get())); } assertTrue(vals.isEmpty()); } in.close(); m.close(); assertEquals(0, mem.allocatedSize()); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.shuffle.collections.HadoopHashMapSelfTest.java
License:Apache License
private void check(HadoopHashMultimap m, Multimap<Integer, Integer> mm, HadoopTaskContext taskCtx) throws Exception { final HadoopTaskInput in = m.input(taskCtx); Map<Integer, Collection<Integer>> mmm = mm.asMap(); int keys = 0; while (in.next()) { keys++;/*from ww w .j av a2s . c o m*/ IntWritable k = (IntWritable) in.key(); assertNotNull(k); ArrayList<Integer> vs = new ArrayList<>(); Iterator<?> it = in.values(); while (it.hasNext()) vs.add(((IntWritable) it.next()).get()); Collection<Integer> exp = mmm.get(k.get()); assertEquals(sorted(exp), sorted(vs)); } X.println("keys: " + keys + " cap: " + m.capacity()); assertEquals(mmm.size(), keys); assertEquals(m.keys(), keys); in.close(); }