List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:com.marcolotz.MRComponents.SerializerConverter.java
License:Creative Commons License
/*** * Reads an Int from the input//from w w w . j a v a 2 s . com * @param datainput * @return the int readen * @throws IOException */ public static int readInt(DataInput datainput) throws IOException { IntWritable readenInt = new IntWritable(); readenInt.readFields(datainput); return readenInt.get(); }
From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (items.size() > count) { if (key == null) { key = new IntWritable(); }//from w ww . j a v a 2s . co m key.set(count); if (value == null) { value = new Text(); } value.set(items.get(count++).getStringValue()); return true; } return false; }
From source file:com.microsoft.canberra.tf.util.DoubleMatrixRecordReader.java
License:Open Source License
@Override public IntWritable createKey() { return new IntWritable(); }
From source file:com.mongodb.hadoop.examples.lolmatches.LOLMatchesdMapper.java
License:Apache License
public LOLMatchesdMapper() { super(); keyInt = new IntWritable(); valueDouble = new DoubleWritable(); }
From source file:com.moz.fiji.mapreduce.lib.reduce.IntSumReducer.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w. j ava 2 s . co m*/ protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); mValue = new IntWritable(); }
From source file:com.mozilla.grouperfish.mahout.clustering.display.kmeans.OriginalText.java
License:Apache License
public Map<Integer, Set<String>> getDocIds(double sampleRate) { Random rand = new Random(); Map<Integer, Set<String>> docIdMap = new HashMap<Integer, Set<String>>(); SequenceFileDirectoryReader pointsReader = null; try {/*from ww w . jav a2s .c o m*/ IntWritable k = new IntWritable(); WeightedVectorWritable wvw = new WeightedVectorWritable(); pointsReader = new SequenceFileDirectoryReader(clusteredPointsPath); while (pointsReader.next(k, wvw)) { int clusterId = k.get(); Vector v = wvw.getVector(); if (v instanceof NamedVector) { if (rand.nextDouble() < sampleRate) { NamedVector nv = (NamedVector) v; nv.getName(); Set<String> curDocIds = docIdMap.get(clusterId); if (curDocIds == null) { curDocIds = new HashSet<String>(); } curDocIds.add(nv.getName()); docIdMap.put(clusterId, curDocIds); } } } } catch (IOException e) { LOG.error("IOException caught while reading clustered points", e); } finally { if (pointsReader != null) { pointsReader.close(); } } return docIdMap; }
From source file:com.mozilla.grouperfish.mahout.clustering.display.kmeans.WordCloud.java
License:Apache License
public Map<Integer, Cloud> getClouds(Cloud template) { Map<Integer, Cloud> cloudMap = new HashMap<Integer, Cloud>(); SequenceFileDirectoryReader pointsReader = null; try {//from ww w. j a v a 2 s. c om IntWritable k = new IntWritable(); WeightedVectorWritable wvw = new WeightedVectorWritable(); pointsReader = new SequenceFileDirectoryReader(clusteredPointsPath); while (pointsReader.next(k, wvw)) { int clusterId = k.get(); Cloud c = cloudMap.get(clusterId); if (c == null) { c = new Cloud(template); } Iterator<Element> viter = wvw.getVector().iterateNonZero(); while (viter.hasNext()) { Element e = viter.next(); String feature = invertedFeatureIndex.get(e.index()); c.addTag(new Tag(feature, e.get())); } cloudMap.put(clusterId, c); } } catch (IOException e) { LOG.error("IOException caught while reading clustered points", e); } finally { if (pointsReader != null) { pointsReader.close(); } } return cloudMap; }
From source file:com.mozilla.grouperfish.transforms.coclustering.display.WriteCoClusteringOutput.java
License:Apache License
private void loadPoints() throws IOException { SequenceFile.Reader currReader = null; IntWritable k = new IntWritable(); CoCluster currCluster;/* www. j a v a2 s . c o m*/ int currVID; WeightedVectorWritable wvw = new WeightedVectorWritable(); try { fs = FileSystem.get(clusteredPointsPath.toUri(), conf); for (FileStatus status : fs.listStatus(clusteredPointsPath)) { Path p = status.getPath(); if (!status.isDir() && !p.getName().startsWith("_")) { try { currReader = new SequenceFile.Reader(fs, p, conf); while (currReader.next(k, wvw)) { currCluster = coclusters.get(k.get()); NamedVector v = (NamedVector) wvw.getVector(); currVID = Integer.parseInt(v.getName()); if (docIDMap.containsKey(currVID)) { currCluster.put(v, docIDMap.get(currVID), true); } else if (featureIDMap.containsKey(currVID)) { currCluster.put(v, featureIDMap.get(currVID), false); } else { LOG.error("Key not feature or document!"); } } } finally { if (currReader != null) { IOUtils.closeStream(currReader); } } } } } catch (IOException ie) { LOG.info("Error while reading points", ie); } catch (ClassCastException ce) { LOG.info("NamedVectors possibly not used", ce); } finally { if (currReader != null) { IOUtils.closeStream(currReader); } if (fs != null) { fs.close(); } } }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public void putNext(Tuple t) throws IOException { IntWritable outputKey = new IntWritable(); VectorWritable outputValue = new VectorWritable(); outputKey.set((Integer) t.get(0)); Tuple currRow = (Tuple) t.get(1);/*from w ww . j a va 2 s.c o m*/ Vector currRowVector; if (dimensions == 0) { throw new IllegalArgumentException("Trying to create 0 dimension vector"); } if (STORE_AS_DENSE) { currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString()); } else if (STORE_AS_SEQUENTIAL) { currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } else { currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } for (int ii = 0; ii < currRow.size(); ii++) { Object o = currRow.get(ii); switch (currRow.getType(ii)) { case DataType.INTEGER: case DataType.LONG: case DataType.FLOAT: case DataType.DOUBLE: currRowVector.set(ii, (Double) o); break; case DataType.TUPLE: // If this is a tuple then we want to set column and element Tuple subt = (Tuple) o; currRowVector.set((Integer) subt.get(0), (Double) subt.get(1)); break; default: throw new RuntimeException("Unexpected tuple form"); } } outputValue.set(currRowVector); try { writer.write(outputKey, outputValue); } catch (InterruptedException e) { LOG.error("Interrupted while writing", e); } }
From source file:com.mvdb.scratch.HadoopClient.java
License:Apache License
public static void readSequenceFile(String sequenceFileName, String hadoopFS) throws IOException { Path path = new Path(sequenceFileName); conf.set("fs.defaultFS", hadoopFS); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); IntWritable key = new IntWritable(); // this could be the wrong type BytesWritable value = new BytesWritable(); // also could be wrong while (reader.next(key, value)) { System.out.println(key + ":" + new String(value.getBytes())); }//from ww w . j a v a 2 s . co m IOUtils.closeStream(reader); }