List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:DAAL.SVDStep3Mapper.java
License:Open Source License
@Override public void map(IntWritable step2key, WriteableData step2value, Context context) throws IOException, InterruptedException { DaalContext daalContext = new DaalContext(); SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), SequenceFile.Reader.file(new Path("/Hadoop/SVD/step1/step1x" + step2value.getId()))); IntWritable step1key = new IntWritable(); WriteableData step1value = new WriteableData(); reader.next(step1key, step1value);/*from w w w. java 2 s.c o m*/ reader.close(); DataCollection s1 = (DataCollection) step1value.getObject(daalContext); DataCollection s2 = (DataCollection) step2value.getObject(daalContext); /* Create an algorithm to compute SVD on the master node */ DistributedStep3Local svdStep3Local = new DistributedStep3Local(daalContext, Double.class, Method.defaultDense); svdStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep1, s1); svdStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep2, s2); /* Compute SVD in step 3 */ svdStep3Local.compute(); Result result = svdStep3Local.finalizeCompute(); HomogenNumericTable Ui = (HomogenNumericTable) result.get(ResultId.leftSingularMatrix); SequenceFile.Writer writer = SequenceFile.createWriter(new Configuration(), SequenceFile.Writer.file(new Path("/Hadoop/SVD/Output/Ux" + step2value.getId())), SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.Writer.valueClass(WriteableData.class)); writer.append(new IntWritable(0), new WriteableData(step2value.getId(), Ui)); writer.close(); daalContext.dispose(); }
From source file:de.kp.core.arules.hadoop.VerticalWritable.java
License:Open Source License
public VerticalWritable() { max = new IntWritable(); tableItemTids = new BitSetArrayWritable(); tableItemCount = new IntArrayWritable(); transactions = new TransactionListWritable(); }
From source file:de.kp.core.spade.hadoop.ExtEquivalenceClassListWritable.java
License:Open Source License
public ExtEquivalenceClassListWritable() { this.sequences = new IntWritable(); this.size = new IntWritable(); this.minsupp = new DoubleWritable(); this.eqClasList = new EquivalenceClassListWritable(); }
From source file:de.kp.core.spade.hadoop.ItemWritable.java
License:Open Source License
public ItemWritable() { itemWritable = new IntWritable(); }
From source file:de.kp.core.spade.hadoop.SequenceWritable.java
License:Open Source License
public SequenceWritable() { idWritable = new IntWritable(); List<ItemsetWritable> empty = Collections.<ItemsetWritable>emptyList(); itemsetListWritable = new ArrayWritable(ItemsetWritable.class, empty.toArray(new Writable[empty.size()])); }
From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java
public static void main(String[] args) throws IOException { String uri = "output"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); File infile = new File("src/main/resources/input.txt"); SequenceFile.Writer writer = null; try {// ww w . j a v a 2 s . c om writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()), Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)), Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824), Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()), Writer.progressable(null), Writer.metadata(new Metadata())); int ctr = 100; List<String> lines = FileUtils.readLines(infile); for (String line : lines) { key.set(ctr++); value.set(line); if (ctr < 150) { System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); } writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:edu.cuhk.hccl.data.NounPhrase.java
License:Apache License
public NounPhrase() { setAdjective(new Text()); setNoun(new Text()); setAspect(new Text()); setRating(new IntWritable()); setSimilarity(new DoubleWritable()); }
From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java
License:Open Source License
@Override public KMeans build() throws DatasetException { try (TemporaryFolder hadoop = new TemporaryFolder()) { File points = new File(hadoop, "points"); points.mkdir();//from w ww .j a va 2 s .com Configuration configuration = new Configuration(); FileSystem system = get(configuration); final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW); List<NamedVector> vectors = new AbstractList<NamedVector>() { @Override public NamedVector get(int index) { final String vector = dimension().keys().get(index); return new NamedVector(new AbstractVector(other.keys().size()) { @Override public void setQuick(int index, double value) { throw new UnsupportedOperationException(); } @Override public Vector like() { return new RandomAccessSparseVector(size()); } @Override public Iterator<Element> iterator() { return new Iterator<Element>() { private int current = 0; @Override public boolean hasNext() { return current < other.keys().size(); } @Override public Element next() { return new Element() { private final int index = current++; @Override public void set(double value) { throw new UnsupportedOperationException(); } @Override public int index() { return index; } @Override @SneakyThrows(InvalidCoordinateException.class) public double get() { return dimension().type() == ROW ? dataset().values().get(vector, other.keys().get(index)) : dataset().values().get(other.keys().get(index), vector); } }; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } @Override public Iterator<Element> iterateNonZero() { return iterator(); } @Override public boolean isSequentialAccess() { return true; } @Override public boolean isDense() { return true; } @Override @SneakyThrows(InvalidCoordinateException.class) public double getQuick(int index) { return dimension().type() == ROW ? dataset().values().get(vector, other.keys().get(index)) : dataset().values().get(other.keys().get(index), vector); } @Override public int getNumNondefaultElements() { return other.keys().size(); } @Override protected Matrix matrixLike(int rows, int columns) { throw new UnsupportedOperationException(); } }, vector); } @Override public int size() { return dimension().keys().size(); } }; // write input try (Writer writer = new Writer(system, configuration, new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class, VectorWritable.class)) { VectorWritable writable = new VectorWritable(); long record = 0; for (Vector vector : vectors) { writable.set(vector); writer.append(new LongWritable(record++), writable); } } // prepare clusters File clusters = new File(hadoop, "clusters"); clusters.mkdir(); try (Writer writer = new Writer(system, configuration, new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) { for (int i = 0; i < k(); i++) { Vector vec = vectors.get(i); Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure()); writer.append(new Text(cluster.getIdentifier()), cluster); } } File output = new File(hadoop, "output"); output.mkdir(); try { run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()), new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true, false); try (Reader reader = new Reader(system, new Path( new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()), configuration)) { IntWritable key = new IntWritable(); WeightedVectorWritable value = new WeightedVectorWritable(); Map<String, Set<String>> result = new HashMap<>(); while (reader.next(key, value)) { Set<String> cluster = result.get(key.toString()); if (cluster == null) result.put(key.toString(), cluster = new HashSet<>()); cluster.add(((NamedVector) value.getVector()).getName()); } return new AbstractKMeans() { }.dataset(dataset()).dimension(dimension()).name(name()).type(type()) .clusters(new HashSet<>(result.values())); } } catch (ClassNotFoundException | InterruptedException e) { throw new DatasetException(e); } } catch (IOException e) { throw new DatasetException(e); } }
From source file:edu.indiana.d2i.htrc.io.index.Dictionary.java
License:Apache License
public static void Dictionary2SeqFile(Configuration conf, String input, String output) throws IOException { FileSystem fs = FileSystem.get(conf); DataInputStream fsinput = new DataInputStream(fs.open(new Path(input))); BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput)); SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf, new Path(output), Text.class, IntWritable.class); String line = null;/* www . j a va2s . co m*/ Text key = new Text(); IntWritable value = new IntWritable(); int count = 0; while ((line = reader.readLine()) != null) { key.set(line); value.set(count++); writer.append(key, value); } writer.close(); reader.close(); }
From source file:edu.indiana.d2i.htrc.io.index.Dictionary.java
License:Apache License
public Dictionary(Configuration conf) throws IOException { // String dicionaryPath = conf.get(HTRCConstants.DICTIONARY_PATH); String dicionaryPath = conf.get("htrc.solr.dictionary"); // System.out.println("!!!dicionaryPath " + dicionaryPath); SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), new Path(dicionaryPath), conf); Text key = new Text(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { dictionary.put(key.toString(), value.get()); }//from w w w .j a v a 2s . c o m reader.close(); }