Example usage for org.apache.hadoop.io LongWritable LongWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable LongWritable.

Prototype

public LongWritable(long value)

Source Link

Usage

From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java

License:Open Source License

/**
 * Get method to get the desired partition
 *
 * @return the desired Partition//from   w  ww.  j  a v  a2  s . com
 */
public LongWritable getCurrentCommunity() {
    return new LongWritable(this.currentCommunity);
}

From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java

License:Open Source License

/**
 * Get the current partition/*from   www . j  a v  a  2  s .c om*/
 *
 * @return the current partition
 */
public LongWritable getLastCommunity() {
    return new LongWritable(this.lastCommunity);
}

From source file:edu.brown.cs.mapreduce.generator.DataLoader.java

License:Open Source License

/**
 * @param args/*from www  . j a  v  a 2s . co  m*/
 */
public static void main(String[] args) {
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-compress".equals(args[i])) {
            DataLoader.compress = true;
            DataLoader.sequence = true;
        } else if ("-sequence".equals(args[i])) {
            DataLoader.sequence = true;
        } else if ("-tuple".equals(args[i])) {
            DataLoader.tuple = true;
        } else if ("-local".equals(args[i])) {
            DataLoader.local = true;
        } else if ("-limit".equals(args[i])) {
            DataLoader.limit = Integer.parseInt(args[++i]);
        } else if ("-xargs".equals(args[i])) {
            DataLoader.xargs = true;
        } else if ("-debug".equals(args[i])) {
            DataLoader.debug = true;
        } else {
            otherArgs.add(args[i]);
        }
    } // FOR

    if (otherArgs.size() < 3 && !DataLoader.xargs) {
        System.err.println("USAGE: DataLoader <input type> <input file> <output file>");
        System.exit(1);
    }

    String input_type = otherArgs.get(0).toLowerCase();
    String input_file = otherArgs.get(1);
    String output_file = null;
    if (DataLoader.xargs) {
        output_file = input_file + ".dl";
    } else {
        output_file = otherArgs.get(2);
    }

    boolean valid = false;
    for (String type : DataLoader.VALID_TYPES) {
        if (type.equals(input_type)) {
            valid = true;
            break;
        }
    }
    if (!valid) {
        System.err.println("ERROR: Invalid input data type '" + input_type + "'");
        System.exit(1);
    }

    if (debug) {
        System.out.println("Input Type:  " + input_type);
        System.out.println("Input File:  " + input_file);
        System.out.println("Output File: " + output_file);
        System.out.println("Limit:       " + DataLoader.limit);
        System.out.println("Local:       " + DataLoader.local);
        System.out.println("XArgs:       " + DataLoader.xargs);
    }

    //
    // Get HDFS filesystem object that we can use for writing
    //
    FileSystem fs = null;
    Configuration conf = null;
    if (!DataLoader.local) {
        conf = AbstractHadoopClient.getConfiguration();
        try {
            fs = FileSystem.get(conf);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
        if (debug)
            System.out.println("fs.default.name: " + conf.get("fs.default.name"));
    }

    //
    // Now open the file that we want to read and start writing the contents to our file system
    // For some things, like 'urls' we will want reverse the order so that the data makes sense
    // in our key->value paradigm
    //
    BufferedReader in = null;
    DataOutputStream out = null;
    SequenceFile.Writer writer = null;
    int lines = 0;
    try {
        if (input_file.equals("-")) {
            in = new BufferedReader(new InputStreamReader(System.in));
        } else {
            in = new BufferedReader(new FileReader(input_file));
        }
    } catch (FileNotFoundException ex) {
        System.err.println("ERROR: The input file '" + input_file + "' was not found : " + ex.getMessage());
        System.exit(1);
    }
    try {
        if (!DataLoader.local) {
            //
            // FileSystem Writer
            //
            if (!DataLoader.sequence) {
                out = fs.create(new Path(output_file));
                //
                // SequenceFile Writer
                //
            } else {
                if (input_type.equals("sortgrep"))
                    DataLoader.tuple = false;
                if (DataLoader.debug)
                    System.out.print("Creating " + (DataLoader.compress ? "compressed " : "")
                            + "SequenceFile.Writer for '" + output_file + "': ");
                Class<? extends Writable> key_class = Text.class;
                Class<? extends Writable> value_class = null;
                if (DataLoader.tuple) {
                    if (input_type.equals("uservisits"))
                        value_class = UserVisitsTuple.class;
                    if (input_type.equals("rankings"))
                        value_class = RankingsTuple.class;
                } else {
                    value_class = Text.class;
                }
                writer = SequenceFile.createWriter(fs, conf, new Path(output_file), key_class, value_class,
                        (DataLoader.compress ? SequenceFile.CompressionType.BLOCK
                                : SequenceFile.CompressionType.NONE));
                if (DataLoader.debug)
                    System.out.println("DONE!");
            }
            //
            // Local Filesystem
            //
        } else {
            out = new DataOutputStream(new FileOutputStream(output_file, true));
        }
    } catch (IOException ex) {
        System.err.println("ERROR: Failed to open output file '" + output_file + "' : " + ex.getMessage());
        System.exit(1);
    }
    try {
        //
        // Now read in each line of the input file and append it to our output
        //
        while (in.ready()) {
            //
            // Ignore any misformated lines
            //
            String line = null;
            String key = "";
            String value = "";
            try {
                line = in.readLine();
                String data[] = line.split("\\" + BenchmarkBase.VALUE_DELIMITER);
                //
                // Switch the two values in a rankings record
                //
                if (input_type.equals("rankings")) {
                    key = data[1];
                    value = data[0];
                    for (int i = 2; i < data.length; i++) {
                        value += BenchmarkBase.VALUE_DELIMITER + data[i];
                    } // FOR
                    //
                    // Change the comma to a tab
                    //
                } else if (input_type.equals("convert") || input_type.equals("uservisits")) {
                    key = data[0];
                    for (int i = 1; i < data.length; i++) {
                        if (i != 1)
                            value += BenchmarkBase.VALUE_DELIMITER;
                        value += data[i];
                    } // FOR
                    //
                    // Don't do anything with the SortGrep data!
                    //
                } else if (input_type.equals("sortgrep")) {
                    key = line.substring(0, 10);
                    value = line.substring(10);
                    //
                    // All others need to switch the first VALUE_DELIMITER to a KEYVALUE_DELIMITER
                    //   
                } else {
                    line = line.replaceFirst(BenchmarkBase.VALUE_DELIMITER, BenchmarkBase.KEYVALUE_DELIMITER);
                }
                if (DataLoader.local || !DataLoader.sequence) {
                    line = key + BenchmarkBase.KEYVALUE_DELIMITER + value + "\n";
                    out.write(line.getBytes());
                } else {
                    //if (DataLoader.debug) System.out.println("[" + lines + "] " + key + " => " + value);
                    if (DataLoader.tuple) {
                        try {
                            data = value.split("\\" + BenchmarkBase.VALUE_DELIMITER);
                            Writable tuple_values[] = new Writable[data.length];
                            Class<?> types[] = (input_type.equals("uservisits") ? BenchmarkBase.USERVISITS_TYPES
                                    : BenchmarkBase.RANKINGS_TYPES);
                            for (int ctr = 0; ctr < data.length; ctr++) {
                                //
                                // Important! You have to subtract one from the types list
                                // because the first one is really the key, but we're creating a tuple
                                // on just the values!!
                                //
                                if (types[ctr + 1] == Text.class) {
                                    tuple_values[ctr] = new Text(data[ctr]);
                                } else if (types[ctr + 1] == IntWritable.class) {
                                    tuple_values[ctr] = new IntWritable(Integer.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == DoubleWritable.class) {
                                    tuple_values[ctr] = new DoubleWritable(Double.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == LongWritable.class) {
                                    tuple_values[ctr] = new LongWritable(Long.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == FloatWritable.class) {
                                    tuple_values[ctr] = new FloatWritable(Float.valueOf(data[ctr]));
                                } else {
                                    System.err.println("Unsupported Class: " + types[ctr + 1]);
                                    System.exit(1);
                                }
                                if (DataLoader.debug)
                                    System.out.println("tuple_values[" + ctr + "] = " + tuple_values[ctr]);
                            }
                            AbstractTuple tuple = (input_type.equals("uservisits")
                                    ? new UserVisitsTuple(tuple_values)
                                    : new RankingsTuple(tuple_values));
                            if (DataLoader.debug)
                                System.out.println("STORING TUPLE: " + tuple + " (DATA " + data + " | VALUE "
                                        + value + ")");
                            writer.append(new Text(key), tuple);
                        } catch (Exception ex) {
                            ex.printStackTrace();
                            System.err.println("Error[" + output_file + "]");
                            System.err.println("## Line:    " + lines);
                            System.err.println("## Content: " + line);
                        }
                    } else {
                        writer.append(new Text(key), new Text(value));
                    }
                }
                lines++;
                if (DataLoader.limit != null && lines >= DataLoader.limit)
                    break;
                if (DataLoader.debug && lines % 1000000 == 0)
                    System.out.println(
                            "\tWrote " + lines + " '" + input_type + "' records to '" + output_file + "'");
            } catch (Exception ex) {
                System.err.println("Error[" + output_file + "]");
                System.err.println("## Line:    " + lines);
                System.err.println("## Content: " + line);
                ex.printStackTrace();
                System.exit(1);
            }
        } // WHILE
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(1);
    } finally {
        try {
            if (in != null)
                in.close();
            if (out != null)
                out.close();
            if (writer != null)
                writer.close();
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(1);
        }
    }
    System.out.println("Wrote " + lines + " '" + input_type + "' records to '" + output_file + "'");
}

From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java

License:Open Source License

@Override
public KMeans build() throws DatasetException {
    try (TemporaryFolder hadoop = new TemporaryFolder()) {
        File points = new File(hadoop, "points");
        points.mkdir();/*from  w w  w  . j  a  v  a  2  s.  c  o m*/

        Configuration configuration = new Configuration();
        FileSystem system = get(configuration);
        final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW);

        List<NamedVector> vectors = new AbstractList<NamedVector>() {

            @Override
            public NamedVector get(int index) {
                final String vector = dimension().keys().get(index);
                return new NamedVector(new AbstractVector(other.keys().size()) {

                    @Override
                    public void setQuick(int index, double value) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public Vector like() {
                        return new RandomAccessSparseVector(size());
                    }

                    @Override
                    public Iterator<Element> iterator() {
                        return new Iterator<Element>() {
                            private int current = 0;

                            @Override
                            public boolean hasNext() {
                                return current < other.keys().size();
                            }

                            @Override
                            public Element next() {
                                return new Element() {
                                    private final int index = current++;

                                    @Override
                                    public void set(double value) {
                                        throw new UnsupportedOperationException();
                                    }

                                    @Override
                                    public int index() {
                                        return index;
                                    }

                                    @Override
                                    @SneakyThrows(InvalidCoordinateException.class)
                                    public double get() {
                                        return dimension().type() == ROW
                                                ? dataset().values().get(vector, other.keys().get(index))
                                                : dataset().values().get(other.keys().get(index), vector);
                                    }
                                };
                            }

                            @Override
                            public void remove() {
                                throw new UnsupportedOperationException();
                            }
                        };
                    }

                    @Override
                    public Iterator<Element> iterateNonZero() {
                        return iterator();
                    }

                    @Override
                    public boolean isSequentialAccess() {
                        return true;
                    }

                    @Override
                    public boolean isDense() {
                        return true;
                    }

                    @Override
                    @SneakyThrows(InvalidCoordinateException.class)
                    public double getQuick(int index) {
                        return dimension().type() == ROW
                                ? dataset().values().get(vector, other.keys().get(index))
                                : dataset().values().get(other.keys().get(index), vector);
                    }

                    @Override
                    public int getNumNondefaultElements() {
                        return other.keys().size();
                    }

                    @Override
                    protected Matrix matrixLike(int rows, int columns) {
                        throw new UnsupportedOperationException();
                    }
                }, vector);
            }

            @Override
            public int size() {
                return dimension().keys().size();
            }
        };

        // write input
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class,
                VectorWritable.class)) {
            VectorWritable writable = new VectorWritable();
            long record = 0;
            for (Vector vector : vectors) {
                writable.set(vector);
                writer.append(new LongWritable(record++), writable);
            }
        }

        // prepare clusters
        File clusters = new File(hadoop, "clusters");
        clusters.mkdir();
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) {
            for (int i = 0; i < k(); i++) {
                Vector vec = vectors.get(i);
                Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
                writer.append(new Text(cluster.getIdentifier()), cluster);
            }
        }

        File output = new File(hadoop, "output");
        output.mkdir();

        try {
            run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()),
                    new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true,
                    false);

            try (Reader reader = new Reader(system, new Path(
                    new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()),
                    configuration)) {
                IntWritable key = new IntWritable();
                WeightedVectorWritable value = new WeightedVectorWritable();
                Map<String, Set<String>> result = new HashMap<>();

                while (reader.next(key, value)) {
                    Set<String> cluster = result.get(key.toString());
                    if (cluster == null)
                        result.put(key.toString(), cluster = new HashSet<>());
                    cluster.add(((NamedVector) value.getVector()).getName());
                }

                return new AbstractKMeans() {
                }.dataset(dataset()).dimension(dimension()).name(name()).type(type())
                        .clusters(new HashSet<>(result.values()));
            }
        } catch (ClassNotFoundException | InterruptedException e) {
            throw new DatasetException(e);
        }
    } catch (IOException e) {
        throw new DatasetException(e);
    }
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.PairWiseDistance.java

License:Open Source License

private void distributeData(int blockSize, Configuration conf, FileSystem fs, Path hdInputDir,
        int noOfDivisions) throws IOException {
    // Writing block meta data to for each block in a separate file so that
    // Hadoop will create separate Map tasks for each block..
    // Key : block number
    // Value: row#column#isDiagonal#base_file_name
    // TODO : find a better way to do this.
    for (int row = 0; row < noOfDivisions; row++) {
        for (int column = 0; column < noOfDivisions; column++) {
            // using the load balancing algorithm to select the blocks
            // include the diagonal blocks as they are blocks, not
            // individual pairs
            if (((row >= column) & ((row + column) % 2 == 0)) | ((row <= column) & ((row + column) % 2 == 1))) {
                Path vFile = new Path(hdInputDir, "data_file_" + row + "_" + column);
                SequenceFile.Writer vWriter = SequenceFile.createWriter(fs, conf, vFile, LongWritable.class,
                        Text.class, CompressionType.NONE);

                boolean isDiagonal = false;
                if (row == column) {
                    isDiagonal = true;//from  w w  w.jav  a 2s .  co  m
                }
                String value = row + Constants.BREAK + column + Constants.BREAK + isDiagonal + Constants.BREAK
                        + Constants.HDFS_SEQ_FILENAME;
                vWriter.append(new LongWritable(row * blockSize + column), new Text(value));
                vWriter.close();
            }
        }
    }
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java

License:Open Source License

public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();
    Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS);
    String valString = value.toString();
    String valArgs[] = valString.split(Constants.BREAK);

    long rowBlock = Long.parseLong(valArgs[0]);
    long columnBlock = Long.parseLong(valArgs[1]);
    boolean isDiagonal = Boolean.parseBoolean(valArgs[2]);
    LOG.info("row column" + rowBlock + "  " + columnBlock + "  " + isDiagonal + "  " + valArgs[2]);

    long row = rowBlock * blockSize;
    long column = columnBlock * blockSize;

    long parseStartTime = System.nanoTime();
    FileSystem fs = FileSystem.getLocal(conf);
    // parse the inputFilePart for row
    Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock);
    FSDataInputStream rowInStream = fs.open(rowPath);
    List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream);
    // parse the inputFilePart for column if this is not a diagonal block
    List<VectorPoint> colSequences;
    if (isDiagonal) {
        colSequences = rowSequences;/*w  w  w  .  j  ava2 s.  c om*/
    } else {
        // parse the inputFilePart for column
        Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock);
        FSDataInputStream colInStream = fs.open(colPath);
        colSequences = SequenceParser.ParseFile(colInStream);
    }
    LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms");

    short[][] alignments = new short[(int) blockSize][(int) blockSize];
    double[][] doubleDistances = new double[(int) blockSize][(int) blockSize];
    double max = Double.MIN_VALUE;
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment;
            alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex));
            if (alignment > max) {
                max = alignment;
            }
            // Get the identity and make it percent identity
            doubleDistances[rowIndex][columnIndex] = alignment;
        }
        alignmentCounter.increment(columnIndex);
    }

    // divide by max to get the range to 0 to 1 and then convert to short and output
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment = doubleDistances[rowIndex][columnIndex] / max;
            short scaledScore = (short) (alignment * Short.MAX_VALUE);
            alignments[rowIndex][columnIndex] = scaledScore;
        }
    }

    SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false);
    dataWritable.setMax(max);
    dataWritable.setAlignments(alignments);
    context.write(new LongWritable(rowBlock), dataWritable);

    if (!isDiagonal) {
        // Create the transpose matrix of (rowBlock, colBlock) block to fill the
        // (colBlock, rowBlock) block.
        SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true);
        inverseDataWritable.setAlignments(alignments);
        context.write(new LongWritable(columnBlock), inverseDataWritable);
    }
    LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms");
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with LastColumnTakesRest option.
 *//*  www  .ja va 2  s .c o  m*/
public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
        String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
                new Text("hive and hadoop"), null, new Text("a\tb\t") };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with extra columns.
 *//*from ww w  .  j a  v  a  2 s .co m*/
public void testLazySimpleSerDeExtraColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
        String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
                new Text("hive and hadoop"), null, new Text("a") };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with missing columns.
 *///from  w  w  w  .j a  v a2s.c om
public void testLazySimpleSerDeMissingColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\t");
        String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null,
                null };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.PreappendLongWritableMapper.java

License:Apache License

@Override
public void map(Object[] data, IDataWriter<Object[]> writer) throws HyracksDataException {
    writer.writeData(new Object[] { new LongWritable(0), new Text(String.valueOf(data[0])) });
}