List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:org.apache.mahout.utils.vectors.lucene.Driver.java
License:Apache License
public void dumpVectors() throws IOException { File file = new File(luceneDir); Preconditions.checkArgument(file.isDirectory(), "Lucene directory: " + file.getAbsolutePath() + " does not exist or is not a directory"); Preconditions.checkArgument(maxDocs >= 0, "maxDocs must be >= 0"); Preconditions.checkArgument(minDf >= 1, "minDf must be >= 1"); Preconditions.checkArgument(maxDFPercent <= 99, "maxDFPercent must be <= 99"); Directory dir = FSDirectory.open(file); IndexReader reader = DirectoryReader.open(dir); Weight weight;// w w w .j a v a 2 s.co m if ("tf".equalsIgnoreCase(weightType)) { weight = new TF(); } else if ("tfidf".equalsIgnoreCase(weightType)) { weight = new TFIDF(); } else { throw new IllegalArgumentException("Weight type " + weightType + " is not supported"); } TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent); LuceneIterable iterable; if (norm == LuceneIterable.NO_NORMALIZING) { iterable = new LuceneIterable(reader, idField, field, termInfo, weight, LuceneIterable.NO_NORMALIZING, maxPercentErrorDocs); } else { iterable = new LuceneIterable(reader, idField, field, termInfo, weight, norm, maxPercentErrorDocs); } log.info("Output File: {}", outFile); VectorWriter vectorWriter = getSeqFileWriter(outFile); try { long numDocs = vectorWriter.write(iterable, maxDocs); log.info("Wrote: {} vectors", numDocs); } finally { Closeables.close(vectorWriter, false); } File dictOutFile = new File(dictOut); log.info("Dictionary Output file: {}", dictOutFile); Writer writer = Files.newWriter(dictOutFile, Charsets.UTF_8); DelimitedTermInfoWriter tiWriter = new DelimitedTermInfoWriter(writer, delimiter, field); try { tiWriter.write(termInfo); } finally { Closeables.close(tiWriter, false); } if (!"".equals(seqDictOut)) { log.info("SequenceFile Dictionary Output file: {}", seqDictOut); Path path = new Path(seqDictOut); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = null; try { seqWriter = SequenceFile.createWriter(fs, conf, path, Text.class, IntWritable.class); Text term = new Text(); IntWritable termIndex = new IntWritable(); Iterator<TermEntry> termEntries = termInfo.getAllEntries(); while (termEntries.hasNext()) { TermEntry termEntry = termEntries.next(); term.set(termEntry.getTerm()); termIndex.set(termEntry.getTermIdx()); seqWriter.append(term, termIndex); } } finally { Closeables.close(seqWriter, false); } } }
From source file:org.apache.mahout.utils.vectors.RowIdJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w ww . ja va 2 s . c om*/ addOutputOption(); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = getOutputPath(); Path indexPath = new Path(outputPath, "docIndex"); Path matrixPath = new Path(outputPath, "matrix"); SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class, Text.class); SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class, VectorWritable.class); try { IntWritable docId = new IntWritable(); int i = 0; int numCols = 0; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) { VectorWritable value = record.getSecond(); docId.set(i); indexWriter.append(docId, record.getFirst()); matrixWriter.append(docId, value); i++; numCols = value.get().size(); } log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath); return 0; } finally { Closeables.close(indexWriter, false); Closeables.close(matrixWriter, false); } }
From source file:org.apache.nutch.crawl.TestMapWritable.java
License:Apache License
public void testPerformance() throws Exception { FileSystem fs = FileSystem.get(configuration); Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile"); fs.delete(file);/*from w ww . jav a2 s. c o m*/ org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, MapWritable.class); // write map System.out.println("start writing map's"); long start = System.currentTimeMillis(); IntWritable key = new IntWritable(); MapWritable map = new MapWritable(); LongWritable mapValue = new LongWritable(); for (int i = 0; i < 1000000; i++) { key.set(i); mapValue.set(i); map.put(key, mapValue); writer.append(key, map); } long needed = System.currentTimeMillis() - start; writer.close(); System.out.println("needed time for writing map's: " + needed); // read map org.apache.hadoop.io.SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, configuration); System.out.println("start reading map's"); start = System.currentTimeMillis(); while (reader.next(key, map)) { } reader.close(); needed = System.currentTimeMillis() - start; System.out.println("needed time for reading map's: " + needed); fs.delete(file); // Text System.out.println("start writing Text's"); writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class); // write map start = System.currentTimeMillis(); key = new IntWritable(); Text value = new Text(); String s = "15726:15726"; for (int i = 0; i < 1000000; i++) { key.set(i); value.set(s); writer.append(key, value); } needed = System.currentTimeMillis() - start; writer.close(); System.out.println("needed time for writing Text's: " + needed); // read map System.out.println("start reading Text's"); reader = new SequenceFile.Reader(fs, file, configuration); start = System.currentTimeMillis(); while (reader.next(key, value)) { } needed = System.currentTimeMillis() - start; System.out.println("needed time for reading Text: " + needed); fs.delete(file); }
From source file:org.apache.orc.impl.TestStringRedBlackTree.java
License:Apache License
/** * Checks the red-black tree rules to make sure that we have correctly built * a valid tree.//from w w w . j a v a 2s . c o m * * Properties: * 1. Red nodes must have black children * 2. Each node must have the same black height on both sides. * * @param node The id of the root of the subtree to check for the red-black * tree properties. * @return The black-height of the subtree. */ private int checkSubtree(RedBlackTree tree, int node, IntWritable count) throws IOException { if (node == RedBlackTree.NULL) { return 1; } count.set(count.get() + 1); boolean is_red = tree.isRed(node); int left = tree.getLeft(node); int right = tree.getRight(node); if (is_red) { if (tree.isRed(left)) { printTree(tree, "", tree.root); throw new IllegalStateException("Left node of " + node + " is " + left + " and both are red."); } if (tree.isRed(right)) { printTree(tree, "", tree.root); throw new IllegalStateException("Right node of " + node + " is " + right + " and both are red."); } } int left_depth = checkSubtree(tree, left, count); int right_depth = checkSubtree(tree, right, count); if (left_depth != right_depth) { printTree(tree, "", tree.root); throw new IllegalStateException( "Lopsided tree at node " + node + " with depths " + left_depth + " and " + right_depth); } if (is_red) { return left_depth; } else { return left_depth + 1; } }
From source file:org.apache.orc.mapred.OrcMapredRecordReader.java
License:Apache License
static IntWritable nextInt(ColumnVector vector, int row, Object previous) { if (vector.isRepeating) { row = 0;// w w w. ja v a2s. c o m } if (vector.noNulls || !vector.isNull[row]) { IntWritable result; if (previous == null || previous.getClass() != IntWritable.class) { result = new IntWritable(); } else { result = (IntWritable) previous; } result.set((int) ((LongColumnVector) vector).vector[row]); return result; } else { return null; } }
From source file:org.apache.orc.mapred.TestMrUnit.java
License:Apache License
@Test public void testMapred() throws IOException { conf.set("io.serializations", OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName()); OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>"); OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>"); MyMapper mapper = new MyMapper(); mapper.configure(conf);/* w w w . j av a2 s . c o m*/ MyReducer reducer = new MyReducer(); reducer.configure(conf); MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>( mapper, reducer); driver.setConfiguration(conf); NullWritable nada = NullWritable.get(); OrcStruct input = (OrcStruct) OrcStruct .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>")); IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0); IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1); Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0); // generate the input stream for (int r = 0; r < 20; ++r) { x.set(100 - (r / 4)); y.set(r * 2); z.set(Integer.toHexString(r)); driver.withInput(nada, input); } // generate the expected outputs for (int g = 4; g >= 0; --g) { x.set(100 - g); for (int i = 0; i < 4; ++i) { int r = g * 4 + i; y.set(r * 2); z.set(Integer.toHexString(r)); driver.withOutput(nada, input); } } driver.runTest(); }
From source file:org.apache.orc.mapreduce.TestMrUnit.java
License:Apache License
@Test public void testMapred() throws IOException { conf.set("io.serializations", OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName()); OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>"); OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>"); MyMapper mapper = new MyMapper(); MyReducer reducer = new MyReducer(); MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>( mapper, reducer);//from www.j av a 2 s. co m driver.setConfiguration(conf); NullWritable nada = NullWritable.get(); OrcStruct input = (OrcStruct) OrcStruct .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>")); IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0); IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1); Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0); // generate the input stream for (int r = 0; r < 20; ++r) { x.set(100 - (r / 4)); y.set(r * 2); z.set(Integer.toHexString(r)); driver.withInput(nada, input); } // generate the expected outputs for (int g = 4; g >= 0; --g) { x.set(100 - g); for (int i = 0; i < 4; ++i) { int r = g * 4 + i; y.set(r * 2); z.set(Integer.toHexString(r)); driver.withOutput(nada, input); } } driver.runTest(); }
From source file:org.apache.pig.piggybank.squeal.backend.storm.oper.TriBasicPersist.java
License:Apache License
@Override public MapIdxWritable combine(MapIdxWritable val1, MapIdxWritable val2) { MapIdxWritable ret = zero();//www .j av a 2s. c o m if (val1 != null) { for (Entry<Writable, Writable> ent : val1.entrySet()) { ret.put(ent.getKey(), new IntWritable(((IntWritable) ent.getValue()).get())); } } // We're going to merge into val1. if (val2 != null) { for (Entry<Writable, Writable> ent : val2.entrySet()) { int c = ((IntWritable) ent.getValue()).get(); IntWritable iw = (IntWritable) ret.get(ent.getKey()); if (iw == null) { iw = new IntWritable(c); ret.put(ent.getKey(), iw); } else { iw.set(iw.get() + c); } } } return ret; }
From source file:org.apache.pig.piggybank.squeal.backend.storm.oper.TriWindowCombinePersist.java
License:Apache License
void addTuple(MapIdxWritable s, NullableTuple t, int c) { int idx = t.getIndex(); Long ws = windowSettings.get(idx); if (ws != null) { IntWritable key_tmp = new IntWritable(idx); // Pull the window. WindowBundle<NullableTuple> w = (WindowBundle<NullableTuple>) s.get(key_tmp); /*/* w w w. j a va 2 s .c om*/ * FIXME: If we get the negative before the positive, this won't work. * The proper way to do this would be to count the removes in window * state so we can ignore adds when the matching positive values come * in. */ if (c < 0) { // Remove the item for negative items. w.remove(t); } else { // Add it otherwise. w.push(t); } } else { // This is not a windowed element, just add like BASEPERSIST. IntWritable iw = (IntWritable) s.get(t); if (iw == null) { iw = new IntWritable(c); s.put(t, iw); } else { iw.set(iw.get() + c); } } }
From source file:org.apache.pig.piggybank.squeal.backend.storm.state.WindowBundle.java
License:Apache License
void update(Writable o, int c) { if (openWin == null) { openNewWindow();//from w w w. j a v a 2 s .c o m } IntWritable iw = (IntWritable) openWin.contents.get(o); if (iw == null) { iw = new IntWritable(c); openWin.contents.put(o, iw); } else { iw.set(iw.get() + c); if (iw.get() == 0) { openWin.contents.remove(o); } } // FIXME: This is incorrect for c != +/- 1. if (c > 0 && iw.get() > 0) { openWin.itemCount += 1; } else if (c < 0 && openWin.itemCount > 0) { openWin.itemCount -= 1; } if (openWin.itemCount == maxSize) { closeWindow(); } }