List of usage examples for org.apache.hadoop.io LongWritable LongWritable
public LongWritable(long value)
From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java
License:Open Source License
/** * Get method to get the desired partition * * @return the desired Partition//from w ww. j a v a2 s . com */ public LongWritable getCurrentCommunity() { return new LongWritable(this.currentCommunity); }
From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java
License:Open Source License
/** * Get the current partition/*from www . j a v a 2 s .c om*/ * * @return the current partition */ public LongWritable getLastCommunity() { return new LongWritable(this.lastCommunity); }
From source file:edu.brown.cs.mapreduce.generator.DataLoader.java
License:Open Source License
/** * @param args/*from www . j a v a 2s . co m*/ */ public static void main(String[] args) { List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { if ("-compress".equals(args[i])) { DataLoader.compress = true; DataLoader.sequence = true; } else if ("-sequence".equals(args[i])) { DataLoader.sequence = true; } else if ("-tuple".equals(args[i])) { DataLoader.tuple = true; } else if ("-local".equals(args[i])) { DataLoader.local = true; } else if ("-limit".equals(args[i])) { DataLoader.limit = Integer.parseInt(args[++i]); } else if ("-xargs".equals(args[i])) { DataLoader.xargs = true; } else if ("-debug".equals(args[i])) { DataLoader.debug = true; } else { otherArgs.add(args[i]); } } // FOR if (otherArgs.size() < 3 && !DataLoader.xargs) { System.err.println("USAGE: DataLoader <input type> <input file> <output file>"); System.exit(1); } String input_type = otherArgs.get(0).toLowerCase(); String input_file = otherArgs.get(1); String output_file = null; if (DataLoader.xargs) { output_file = input_file + ".dl"; } else { output_file = otherArgs.get(2); } boolean valid = false; for (String type : DataLoader.VALID_TYPES) { if (type.equals(input_type)) { valid = true; break; } } if (!valid) { System.err.println("ERROR: Invalid input data type '" + input_type + "'"); System.exit(1); } if (debug) { System.out.println("Input Type: " + input_type); System.out.println("Input File: " + input_file); System.out.println("Output File: " + output_file); System.out.println("Limit: " + DataLoader.limit); System.out.println("Local: " + DataLoader.local); System.out.println("XArgs: " + DataLoader.xargs); } // // Get HDFS filesystem object that we can use for writing // FileSystem fs = null; Configuration conf = null; if (!DataLoader.local) { conf = AbstractHadoopClient.getConfiguration(); try { fs = FileSystem.get(conf); } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } if (debug) System.out.println("fs.default.name: " + conf.get("fs.default.name")); } // // Now open the file that we want to read and start writing the contents to our file system // For some things, like 'urls' we will want reverse the order so that the data makes sense // in our key->value paradigm // BufferedReader in = null; DataOutputStream out = null; SequenceFile.Writer writer = null; int lines = 0; try { if (input_file.equals("-")) { in = new BufferedReader(new InputStreamReader(System.in)); } else { in = new BufferedReader(new FileReader(input_file)); } } catch (FileNotFoundException ex) { System.err.println("ERROR: The input file '" + input_file + "' was not found : " + ex.getMessage()); System.exit(1); } try { if (!DataLoader.local) { // // FileSystem Writer // if (!DataLoader.sequence) { out = fs.create(new Path(output_file)); // // SequenceFile Writer // } else { if (input_type.equals("sortgrep")) DataLoader.tuple = false; if (DataLoader.debug) System.out.print("Creating " + (DataLoader.compress ? "compressed " : "") + "SequenceFile.Writer for '" + output_file + "': "); Class<? extends Writable> key_class = Text.class; Class<? extends Writable> value_class = null; if (DataLoader.tuple) { if (input_type.equals("uservisits")) value_class = UserVisitsTuple.class; if (input_type.equals("rankings")) value_class = RankingsTuple.class; } else { value_class = Text.class; } writer = SequenceFile.createWriter(fs, conf, new Path(output_file), key_class, value_class, (DataLoader.compress ? SequenceFile.CompressionType.BLOCK : SequenceFile.CompressionType.NONE)); if (DataLoader.debug) System.out.println("DONE!"); } // // Local Filesystem // } else { out = new DataOutputStream(new FileOutputStream(output_file, true)); } } catch (IOException ex) { System.err.println("ERROR: Failed to open output file '" + output_file + "' : " + ex.getMessage()); System.exit(1); } try { // // Now read in each line of the input file and append it to our output // while (in.ready()) { // // Ignore any misformated lines // String line = null; String key = ""; String value = ""; try { line = in.readLine(); String data[] = line.split("\\" + BenchmarkBase.VALUE_DELIMITER); // // Switch the two values in a rankings record // if (input_type.equals("rankings")) { key = data[1]; value = data[0]; for (int i = 2; i < data.length; i++) { value += BenchmarkBase.VALUE_DELIMITER + data[i]; } // FOR // // Change the comma to a tab // } else if (input_type.equals("convert") || input_type.equals("uservisits")) { key = data[0]; for (int i = 1; i < data.length; i++) { if (i != 1) value += BenchmarkBase.VALUE_DELIMITER; value += data[i]; } // FOR // // Don't do anything with the SortGrep data! // } else if (input_type.equals("sortgrep")) { key = line.substring(0, 10); value = line.substring(10); // // All others need to switch the first VALUE_DELIMITER to a KEYVALUE_DELIMITER // } else { line = line.replaceFirst(BenchmarkBase.VALUE_DELIMITER, BenchmarkBase.KEYVALUE_DELIMITER); } if (DataLoader.local || !DataLoader.sequence) { line = key + BenchmarkBase.KEYVALUE_DELIMITER + value + "\n"; out.write(line.getBytes()); } else { //if (DataLoader.debug) System.out.println("[" + lines + "] " + key + " => " + value); if (DataLoader.tuple) { try { data = value.split("\\" + BenchmarkBase.VALUE_DELIMITER); Writable tuple_values[] = new Writable[data.length]; Class<?> types[] = (input_type.equals("uservisits") ? BenchmarkBase.USERVISITS_TYPES : BenchmarkBase.RANKINGS_TYPES); for (int ctr = 0; ctr < data.length; ctr++) { // // Important! You have to subtract one from the types list // because the first one is really the key, but we're creating a tuple // on just the values!! // if (types[ctr + 1] == Text.class) { tuple_values[ctr] = new Text(data[ctr]); } else if (types[ctr + 1] == IntWritable.class) { tuple_values[ctr] = new IntWritable(Integer.valueOf(data[ctr])); } else if (types[ctr + 1] == DoubleWritable.class) { tuple_values[ctr] = new DoubleWritable(Double.valueOf(data[ctr])); } else if (types[ctr + 1] == LongWritable.class) { tuple_values[ctr] = new LongWritable(Long.valueOf(data[ctr])); } else if (types[ctr + 1] == FloatWritable.class) { tuple_values[ctr] = new FloatWritable(Float.valueOf(data[ctr])); } else { System.err.println("Unsupported Class: " + types[ctr + 1]); System.exit(1); } if (DataLoader.debug) System.out.println("tuple_values[" + ctr + "] = " + tuple_values[ctr]); } AbstractTuple tuple = (input_type.equals("uservisits") ? new UserVisitsTuple(tuple_values) : new RankingsTuple(tuple_values)); if (DataLoader.debug) System.out.println("STORING TUPLE: " + tuple + " (DATA " + data + " | VALUE " + value + ")"); writer.append(new Text(key), tuple); } catch (Exception ex) { ex.printStackTrace(); System.err.println("Error[" + output_file + "]"); System.err.println("## Line: " + lines); System.err.println("## Content: " + line); } } else { writer.append(new Text(key), new Text(value)); } } lines++; if (DataLoader.limit != null && lines >= DataLoader.limit) break; if (DataLoader.debug && lines % 1000000 == 0) System.out.println( "\tWrote " + lines + " '" + input_type + "' records to '" + output_file + "'"); } catch (Exception ex) { System.err.println("Error[" + output_file + "]"); System.err.println("## Line: " + lines); System.err.println("## Content: " + line); ex.printStackTrace(); System.exit(1); } } // WHILE } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } finally { try { if (in != null) in.close(); if (out != null) out.close(); if (writer != null) writer.close(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } } System.out.println("Wrote " + lines + " '" + input_type + "' records to '" + output_file + "'"); }
From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java
License:Open Source License
@Override public KMeans build() throws DatasetException { try (TemporaryFolder hadoop = new TemporaryFolder()) { File points = new File(hadoop, "points"); points.mkdir();/*from w w w . j a v a 2 s. c o m*/ Configuration configuration = new Configuration(); FileSystem system = get(configuration); final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW); List<NamedVector> vectors = new AbstractList<NamedVector>() { @Override public NamedVector get(int index) { final String vector = dimension().keys().get(index); return new NamedVector(new AbstractVector(other.keys().size()) { @Override public void setQuick(int index, double value) { throw new UnsupportedOperationException(); } @Override public Vector like() { return new RandomAccessSparseVector(size()); } @Override public Iterator<Element> iterator() { return new Iterator<Element>() { private int current = 0; @Override public boolean hasNext() { return current < other.keys().size(); } @Override public Element next() { return new Element() { private final int index = current++; @Override public void set(double value) { throw new UnsupportedOperationException(); } @Override public int index() { return index; } @Override @SneakyThrows(InvalidCoordinateException.class) public double get() { return dimension().type() == ROW ? dataset().values().get(vector, other.keys().get(index)) : dataset().values().get(other.keys().get(index), vector); } }; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } @Override public Iterator<Element> iterateNonZero() { return iterator(); } @Override public boolean isSequentialAccess() { return true; } @Override public boolean isDense() { return true; } @Override @SneakyThrows(InvalidCoordinateException.class) public double getQuick(int index) { return dimension().type() == ROW ? dataset().values().get(vector, other.keys().get(index)) : dataset().values().get(other.keys().get(index), vector); } @Override public int getNumNondefaultElements() { return other.keys().size(); } @Override protected Matrix matrixLike(int rows, int columns) { throw new UnsupportedOperationException(); } }, vector); } @Override public int size() { return dimension().keys().size(); } }; // write input try (Writer writer = new Writer(system, configuration, new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class, VectorWritable.class)) { VectorWritable writable = new VectorWritable(); long record = 0; for (Vector vector : vectors) { writable.set(vector); writer.append(new LongWritable(record++), writable); } } // prepare clusters File clusters = new File(hadoop, "clusters"); clusters.mkdir(); try (Writer writer = new Writer(system, configuration, new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) { for (int i = 0; i < k(); i++) { Vector vec = vectors.get(i); Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure()); writer.append(new Text(cluster.getIdentifier()), cluster); } } File output = new File(hadoop, "output"); output.mkdir(); try { run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()), new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true, false); try (Reader reader = new Reader(system, new Path( new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()), configuration)) { IntWritable key = new IntWritable(); WeightedVectorWritable value = new WeightedVectorWritable(); Map<String, Set<String>> result = new HashMap<>(); while (reader.next(key, value)) { Set<String> cluster = result.get(key.toString()); if (cluster == null) result.put(key.toString(), cluster = new HashSet<>()); cluster.add(((NamedVector) value.getVector()).getName()); } return new AbstractKMeans() { }.dataset(dataset()).dimension(dimension()).name(name()).type(type()) .clusters(new HashSet<>(result.values())); } } catch (ClassNotFoundException | InterruptedException e) { throw new DatasetException(e); } } catch (IOException e) { throw new DatasetException(e); } }
From source file:edu.indiana.soic.ts.mapreduce.pwd.PairWiseDistance.java
License:Open Source License
private void distributeData(int blockSize, Configuration conf, FileSystem fs, Path hdInputDir, int noOfDivisions) throws IOException { // Writing block meta data to for each block in a separate file so that // Hadoop will create separate Map tasks for each block.. // Key : block number // Value: row#column#isDiagonal#base_file_name // TODO : find a better way to do this. for (int row = 0; row < noOfDivisions; row++) { for (int column = 0; column < noOfDivisions; column++) { // using the load balancing algorithm to select the blocks // include the diagonal blocks as they are blocks, not // individual pairs if (((row >= column) & ((row + column) % 2 == 0)) | ((row <= column) & ((row + column) % 2 == 1))) { Path vFile = new Path(hdInputDir, "data_file_" + row + "_" + column); SequenceFile.Writer vWriter = SequenceFile.createWriter(fs, conf, vFile, LongWritable.class, Text.class, CompressionType.NONE); boolean isDiagonal = false; if (row == column) { isDiagonal = true;//from w w w.jav a 2s . co m } String value = row + Constants.BREAK + column + Constants.BREAK + isDiagonal + Constants.BREAK + Constants.HDFS_SEQ_FILENAME; vWriter.append(new LongWritable(row * blockSize + column), new Text(value)); vWriter.close(); } } } }
From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java
License:Open Source License
public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException { long startTime = System.nanoTime(); Configuration conf = context.getConfiguration(); Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS); String valString = value.toString(); String valArgs[] = valString.split(Constants.BREAK); long rowBlock = Long.parseLong(valArgs[0]); long columnBlock = Long.parseLong(valArgs[1]); boolean isDiagonal = Boolean.parseBoolean(valArgs[2]); LOG.info("row column" + rowBlock + " " + columnBlock + " " + isDiagonal + " " + valArgs[2]); long row = rowBlock * blockSize; long column = columnBlock * blockSize; long parseStartTime = System.nanoTime(); FileSystem fs = FileSystem.getLocal(conf); // parse the inputFilePart for row Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock); FSDataInputStream rowInStream = fs.open(rowPath); List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream); // parse the inputFilePart for column if this is not a diagonal block List<VectorPoint> colSequences; if (isDiagonal) { colSequences = rowSequences;/*w w w . j ava2 s. c om*/ } else { // parse the inputFilePart for column Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock); FSDataInputStream colInStream = fs.open(colPath); colSequences = SequenceParser.ParseFile(colInStream); } LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms"); short[][] alignments = new short[(int) blockSize][(int) blockSize]; double[][] doubleDistances = new double[(int) blockSize][(int) blockSize]; double max = Double.MIN_VALUE; for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment; alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex)); if (alignment > max) { max = alignment; } // Get the identity and make it percent identity doubleDistances[rowIndex][columnIndex] = alignment; } alignmentCounter.increment(columnIndex); } // divide by max to get the range to 0 to 1 and then convert to short and output for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) { int columnIndex = 0; for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) { double alignment = doubleDistances[rowIndex][columnIndex] / max; short scaledScore = (short) (alignment * Short.MAX_VALUE); alignments[rowIndex][columnIndex] = scaledScore; } } SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false); dataWritable.setMax(max); dataWritable.setAlignments(alignments); context.write(new LongWritable(rowBlock), dataWritable); if (!isDiagonal) { // Create the transpose matrix of (rowBlock, colBlock) block to fill the // (colBlock, rowBlock) block. SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true); inverseDataWritable.setAlignments(alignments); context.write(new LongWritable(columnBlock), inverseDataWritable); } LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms"); }
From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java
License:Apache License
/** * Test the LazySimpleSerDe class with LastColumnTakesRest option. *//* www .ja va 2 s .c o m*/ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable { try { // Create the SerDe LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true"); serDe.initialize(conf, tbl); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t"; Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, new Text("a\tb\t") }; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData); } catch (Throwable e) { e.printStackTrace(); throw e; } }
From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java
License:Apache License
/** * Test the LazySimpleSerDe class with extra columns. *//*from ww w . j a v a 2 s .co m*/ public void testLazySimpleSerDeExtraColumns() throws Throwable { try { // Create the SerDe LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); serDe.initialize(conf, tbl); // Data Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t"); String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta"; Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, new Text("a") }; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData); } catch (Throwable e) { e.printStackTrace(); throw e; } }
From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java
License:Apache License
/** * Test the LazySimpleSerDe class with missing columns. *///from w w w .j a v a2s.c om public void testLazySimpleSerDeMissingColumns() throws Throwable { try { // Create the SerDe LazySimpleSerDe serDe = new LazySimpleSerDe(); Configuration conf = new Configuration(); Properties tbl = createProperties(); serDe.initialize(conf, tbl); // Data Text t = new Text("123\t456\t789\t1000\t5.3\t"); String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL"; Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null, null }; // Test deserializeAndSerialize(serDe, t, s, expectedFieldsData); } catch (Throwable e) { e.printStackTrace(); throw e; } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.PreappendLongWritableMapper.java
License:Apache License
@Override public void map(Object[] data, IDataWriter<Object[]> writer) throws HyracksDataException { writer.writeData(new Object[] { new LongWritable(0), new Text(String.valueOf(data[0])) }); }