Example usage for org.apache.hadoop.io LongWritable LongWritable

List of usage examples for org.apache.hadoop.io LongWritable LongWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable LongWritable.

Prototype

public LongWritable(long value) 

Source Link

Usage

From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java

License:Open Source License

/**
 * Get method to get the desired partition
 *
 * @return the desired Partition//from   w  ww.  j  a v  a2  s . com
 */
public LongWritable getCurrentCommunity() {
    return new LongWritable(this.currentCommunity);
}

From source file:de.unileipzig.dbs.giraph.algorithms.labelpropagation.LPVertexValue.java

License:Open Source License

/**
 * Get the current partition/*from   www . j  a v  a  2  s .c om*/
 *
 * @return the current partition
 */
public LongWritable getLastCommunity() {
    return new LongWritable(this.lastCommunity);
}

From source file:edu.brown.cs.mapreduce.generator.DataLoader.java

License:Open Source License

/**
 * @param args/*from www  . j a  v  a 2s . co  m*/
 */
public static void main(String[] args) {
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-compress".equals(args[i])) {
            DataLoader.compress = true;
            DataLoader.sequence = true;
        } else if ("-sequence".equals(args[i])) {
            DataLoader.sequence = true;
        } else if ("-tuple".equals(args[i])) {
            DataLoader.tuple = true;
        } else if ("-local".equals(args[i])) {
            DataLoader.local = true;
        } else if ("-limit".equals(args[i])) {
            DataLoader.limit = Integer.parseInt(args[++i]);
        } else if ("-xargs".equals(args[i])) {
            DataLoader.xargs = true;
        } else if ("-debug".equals(args[i])) {
            DataLoader.debug = true;
        } else {
            otherArgs.add(args[i]);
        }
    } // FOR

    if (otherArgs.size() < 3 && !DataLoader.xargs) {
        System.err.println("USAGE: DataLoader <input type> <input file> <output file>");
        System.exit(1);
    }

    String input_type = otherArgs.get(0).toLowerCase();
    String input_file = otherArgs.get(1);
    String output_file = null;
    if (DataLoader.xargs) {
        output_file = input_file + ".dl";
    } else {
        output_file = otherArgs.get(2);
    }

    boolean valid = false;
    for (String type : DataLoader.VALID_TYPES) {
        if (type.equals(input_type)) {
            valid = true;
            break;
        }
    }
    if (!valid) {
        System.err.println("ERROR: Invalid input data type '" + input_type + "'");
        System.exit(1);
    }

    if (debug) {
        System.out.println("Input Type:  " + input_type);
        System.out.println("Input File:  " + input_file);
        System.out.println("Output File: " + output_file);
        System.out.println("Limit:       " + DataLoader.limit);
        System.out.println("Local:       " + DataLoader.local);
        System.out.println("XArgs:       " + DataLoader.xargs);
    }

    //
    // Get HDFS filesystem object that we can use for writing
    //
    FileSystem fs = null;
    Configuration conf = null;
    if (!DataLoader.local) {
        conf = AbstractHadoopClient.getConfiguration();
        try {
            fs = FileSystem.get(conf);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
        if (debug)
            System.out.println("fs.default.name: " + conf.get("fs.default.name"));
    }

    //
    // Now open the file that we want to read and start writing the contents to our file system
    // For some things, like 'urls' we will want reverse the order so that the data makes sense
    // in our key->value paradigm
    //
    BufferedReader in = null;
    DataOutputStream out = null;
    SequenceFile.Writer writer = null;
    int lines = 0;
    try {
        if (input_file.equals("-")) {
            in = new BufferedReader(new InputStreamReader(System.in));
        } else {
            in = new BufferedReader(new FileReader(input_file));
        }
    } catch (FileNotFoundException ex) {
        System.err.println("ERROR: The input file '" + input_file + "' was not found : " + ex.getMessage());
        System.exit(1);
    }
    try {
        if (!DataLoader.local) {
            //
            // FileSystem Writer
            //
            if (!DataLoader.sequence) {
                out = fs.create(new Path(output_file));
                //
                // SequenceFile Writer
                //
            } else {
                if (input_type.equals("sortgrep"))
                    DataLoader.tuple = false;
                if (DataLoader.debug)
                    System.out.print("Creating " + (DataLoader.compress ? "compressed " : "")
                            + "SequenceFile.Writer for '" + output_file + "': ");
                Class<? extends Writable> key_class = Text.class;
                Class<? extends Writable> value_class = null;
                if (DataLoader.tuple) {
                    if (input_type.equals("uservisits"))
                        value_class = UserVisitsTuple.class;
                    if (input_type.equals("rankings"))
                        value_class = RankingsTuple.class;
                } else {
                    value_class = Text.class;
                }
                writer = SequenceFile.createWriter(fs, conf, new Path(output_file), key_class, value_class,
                        (DataLoader.compress ? SequenceFile.CompressionType.BLOCK
                                : SequenceFile.CompressionType.NONE));
                if (DataLoader.debug)
                    System.out.println("DONE!");
            }
            //
            // Local Filesystem
            //
        } else {
            out = new DataOutputStream(new FileOutputStream(output_file, true));
        }
    } catch (IOException ex) {
        System.err.println("ERROR: Failed to open output file '" + output_file + "' : " + ex.getMessage());
        System.exit(1);
    }
    try {
        //
        // Now read in each line of the input file and append it to our output
        //
        while (in.ready()) {
            //
            // Ignore any misformated lines
            //
            String line = null;
            String key = "";
            String value = "";
            try {
                line = in.readLine();
                String data[] = line.split("\\" + BenchmarkBase.VALUE_DELIMITER);
                //
                // Switch the two values in a rankings record
                //
                if (input_type.equals("rankings")) {
                    key = data[1];
                    value = data[0];
                    for (int i = 2; i < data.length; i++) {
                        value += BenchmarkBase.VALUE_DELIMITER + data[i];
                    } // FOR
                    //
                    // Change the comma to a tab
                    //
                } else if (input_type.equals("convert") || input_type.equals("uservisits")) {
                    key = data[0];
                    for (int i = 1; i < data.length; i++) {
                        if (i != 1)
                            value += BenchmarkBase.VALUE_DELIMITER;
                        value += data[i];
                    } // FOR
                    //
                    // Don't do anything with the SortGrep data!
                    //
                } else if (input_type.equals("sortgrep")) {
                    key = line.substring(0, 10);
                    value = line.substring(10);
                    //
                    // All others need to switch the first VALUE_DELIMITER to a KEYVALUE_DELIMITER
                    //   
                } else {
                    line = line.replaceFirst(BenchmarkBase.VALUE_DELIMITER, BenchmarkBase.KEYVALUE_DELIMITER);
                }
                if (DataLoader.local || !DataLoader.sequence) {
                    line = key + BenchmarkBase.KEYVALUE_DELIMITER + value + "\n";
                    out.write(line.getBytes());
                } else {
                    //if (DataLoader.debug) System.out.println("[" + lines + "] " + key + " => " + value);
                    if (DataLoader.tuple) {
                        try {
                            data = value.split("\\" + BenchmarkBase.VALUE_DELIMITER);
                            Writable tuple_values[] = new Writable[data.length];
                            Class<?> types[] = (input_type.equals("uservisits") ? BenchmarkBase.USERVISITS_TYPES
                                    : BenchmarkBase.RANKINGS_TYPES);
                            for (int ctr = 0; ctr < data.length; ctr++) {
                                //
                                // Important! You have to subtract one from the types list
                                // because the first one is really the key, but we're creating a tuple
                                // on just the values!!
                                //
                                if (types[ctr + 1] == Text.class) {
                                    tuple_values[ctr] = new Text(data[ctr]);
                                } else if (types[ctr + 1] == IntWritable.class) {
                                    tuple_values[ctr] = new IntWritable(Integer.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == DoubleWritable.class) {
                                    tuple_values[ctr] = new DoubleWritable(Double.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == LongWritable.class) {
                                    tuple_values[ctr] = new LongWritable(Long.valueOf(data[ctr]));
                                } else if (types[ctr + 1] == FloatWritable.class) {
                                    tuple_values[ctr] = new FloatWritable(Float.valueOf(data[ctr]));
                                } else {
                                    System.err.println("Unsupported Class: " + types[ctr + 1]);
                                    System.exit(1);
                                }
                                if (DataLoader.debug)
                                    System.out.println("tuple_values[" + ctr + "] = " + tuple_values[ctr]);
                            }
                            AbstractTuple tuple = (input_type.equals("uservisits")
                                    ? new UserVisitsTuple(tuple_values)
                                    : new RankingsTuple(tuple_values));
                            if (DataLoader.debug)
                                System.out.println("STORING TUPLE: " + tuple + " (DATA " + data + " | VALUE "
                                        + value + ")");
                            writer.append(new Text(key), tuple);
                        } catch (Exception ex) {
                            ex.printStackTrace();
                            System.err.println("Error[" + output_file + "]");
                            System.err.println("## Line:    " + lines);
                            System.err.println("## Content: " + line);
                        }
                    } else {
                        writer.append(new Text(key), new Text(value));
                    }
                }
                lines++;
                if (DataLoader.limit != null && lines >= DataLoader.limit)
                    break;
                if (DataLoader.debug && lines % 1000000 == 0)
                    System.out.println(
                            "\tWrote " + lines + " '" + input_type + "' records to '" + output_file + "'");
            } catch (Exception ex) {
                System.err.println("Error[" + output_file + "]");
                System.err.println("## Line:    " + lines);
                System.err.println("## Content: " + line);
                ex.printStackTrace();
                System.exit(1);
            }
        } // WHILE
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(1);
    } finally {
        try {
            if (in != null)
                in.close();
            if (out != null)
                out.close();
            if (writer != null)
                writer.close();
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(1);
        }
    }
    System.out.println("Wrote " + lines + " '" + input_type + "' records to '" + output_file + "'");
}

From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java

License:Open Source License

@Override
public KMeans build() throws DatasetException {
    try (TemporaryFolder hadoop = new TemporaryFolder()) {
        File points = new File(hadoop, "points");
        points.mkdir();/*from  w w  w  . j  a  v  a  2  s.  c  o m*/

        Configuration configuration = new Configuration();
        FileSystem system = get(configuration);
        final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW);

        List<NamedVector> vectors = new AbstractList<NamedVector>() {

            @Override
            public NamedVector get(int index) {
                final String vector = dimension().keys().get(index);
                return new NamedVector(new AbstractVector(other.keys().size()) {

                    @Override
                    public void setQuick(int index, double value) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public Vector like() {
                        return new RandomAccessSparseVector(size());
                    }

                    @Override
                    public Iterator<Element> iterator() {
                        return new Iterator<Element>() {
                            private int current = 0;

                            @Override
                            public boolean hasNext() {
                                return current < other.keys().size();
                            }

                            @Override
                            public Element next() {
                                return new Element() {
                                    private final int index = current++;

                                    @Override
                                    public void set(double value) {
                                        throw new UnsupportedOperationException();
                                    }

                                    @Override
                                    public int index() {
                                        return index;
                                    }

                                    @Override
                                    @SneakyThrows(InvalidCoordinateException.class)
                                    public double get() {
                                        return dimension().type() == ROW
                                                ? dataset().values().get(vector, other.keys().get(index))
                                                : dataset().values().get(other.keys().get(index), vector);
                                    }
                                };
                            }

                            @Override
                            public void remove() {
                                throw new UnsupportedOperationException();
                            }
                        };
                    }

                    @Override
                    public Iterator<Element> iterateNonZero() {
                        return iterator();
                    }

                    @Override
                    public boolean isSequentialAccess() {
                        return true;
                    }

                    @Override
                    public boolean isDense() {
                        return true;
                    }

                    @Override
                    @SneakyThrows(InvalidCoordinateException.class)
                    public double getQuick(int index) {
                        return dimension().type() == ROW
                                ? dataset().values().get(vector, other.keys().get(index))
                                : dataset().values().get(other.keys().get(index), vector);
                    }

                    @Override
                    public int getNumNondefaultElements() {
                        return other.keys().size();
                    }

                    @Override
                    protected Matrix matrixLike(int rows, int columns) {
                        throw new UnsupportedOperationException();
                    }
                }, vector);
            }

            @Override
            public int size() {
                return dimension().keys().size();
            }
        };

        // write input
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class,
                VectorWritable.class)) {
            VectorWritable writable = new VectorWritable();
            long record = 0;
            for (Vector vector : vectors) {
                writable.set(vector);
                writer.append(new LongWritable(record++), writable);
            }
        }

        // prepare clusters
        File clusters = new File(hadoop, "clusters");
        clusters.mkdir();
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) {
            for (int i = 0; i < k(); i++) {
                Vector vec = vectors.get(i);
                Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
                writer.append(new Text(cluster.getIdentifier()), cluster);
            }
        }

        File output = new File(hadoop, "output");
        output.mkdir();

        try {
            run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()),
                    new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true,
                    false);

            try (Reader reader = new Reader(system, new Path(
                    new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()),
                    configuration)) {
                IntWritable key = new IntWritable();
                WeightedVectorWritable value = new WeightedVectorWritable();
                Map<String, Set<String>> result = new HashMap<>();

                while (reader.next(key, value)) {
                    Set<String> cluster = result.get(key.toString());
                    if (cluster == null)
                        result.put(key.toString(), cluster = new HashSet<>());
                    cluster.add(((NamedVector) value.getVector()).getName());
                }

                return new AbstractKMeans() {
                }.dataset(dataset()).dimension(dimension()).name(name()).type(type())
                        .clusters(new HashSet<>(result.values()));
            }
        } catch (ClassNotFoundException | InterruptedException e) {
            throw new DatasetException(e);
        }
    } catch (IOException e) {
        throw new DatasetException(e);
    }
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.PairWiseDistance.java

License:Open Source License

private void distributeData(int blockSize, Configuration conf, FileSystem fs, Path hdInputDir,
        int noOfDivisions) throws IOException {
    // Writing block meta data to for each block in a separate file so that
    // Hadoop will create separate Map tasks for each block..
    // Key : block number
    // Value: row#column#isDiagonal#base_file_name
    // TODO : find a better way to do this.
    for (int row = 0; row < noOfDivisions; row++) {
        for (int column = 0; column < noOfDivisions; column++) {
            // using the load balancing algorithm to select the blocks
            // include the diagonal blocks as they are blocks, not
            // individual pairs
            if (((row >= column) & ((row + column) % 2 == 0)) | ((row <= column) & ((row + column) % 2 == 1))) {
                Path vFile = new Path(hdInputDir, "data_file_" + row + "_" + column);
                SequenceFile.Writer vWriter = SequenceFile.createWriter(fs, conf, vFile, LongWritable.class,
                        Text.class, CompressionType.NONE);

                boolean isDiagonal = false;
                if (row == column) {
                    isDiagonal = true;//from  w w  w.jav  a 2s .  co  m
                }
                String value = row + Constants.BREAK + column + Constants.BREAK + isDiagonal + Constants.BREAK
                        + Constants.HDFS_SEQ_FILENAME;
                vWriter.append(new LongWritable(row * blockSize + column), new Text(value));
                vWriter.close();
            }
        }
    }
}

From source file:edu.indiana.soic.ts.mapreduce.pwd.SWGMap.java

License:Open Source License

public void map(LongWritable blockIndex, Text value, Context context) throws IOException, InterruptedException {
    long startTime = System.nanoTime();
    Configuration conf = context.getConfiguration();
    Counter alignmentCounter = context.getCounter(Constants.RecordCounters.ALIGNMENTS);
    String valString = value.toString();
    String valArgs[] = valString.split(Constants.BREAK);

    long rowBlock = Long.parseLong(valArgs[0]);
    long columnBlock = Long.parseLong(valArgs[1]);
    boolean isDiagonal = Boolean.parseBoolean(valArgs[2]);
    LOG.info("row column" + rowBlock + "  " + columnBlock + "  " + isDiagonal + "  " + valArgs[2]);

    long row = rowBlock * blockSize;
    long column = columnBlock * blockSize;

    long parseStartTime = System.nanoTime();
    FileSystem fs = FileSystem.getLocal(conf);
    // parse the inputFilePart for row
    Path rowPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + rowBlock);
    FSDataInputStream rowInStream = fs.open(rowPath);
    List<VectorPoint> rowSequences = SequenceParser.ParseFile(rowInStream);
    // parse the inputFilePart for column if this is not a diagonal block
    List<VectorPoint> colSequences;
    if (isDiagonal) {
        colSequences = rowSequences;/*w  w  w  .  j  ava2 s.  c om*/
    } else {
        // parse the inputFilePart for column
        Path colPath = new Path(Constants.HDFS_SEQ_FILENAME + "_" + columnBlock);
        FSDataInputStream colInStream = fs.open(colPath);
        colSequences = SequenceParser.ParseFile(colInStream);
    }
    LOG.info("Parsing time : " + ((System.nanoTime() - parseStartTime) / 1000000) + "ms");

    short[][] alignments = new short[(int) blockSize][(int) blockSize];
    double[][] doubleDistances = new double[(int) blockSize][(int) blockSize];
    double max = Double.MIN_VALUE;
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment;
            alignment = distFunc.calc(rowSequences.get(rowIndex), colSequences.get(columnIndex));
            if (alignment > max) {
                max = alignment;
            }
            // Get the identity and make it percent identity
            doubleDistances[rowIndex][columnIndex] = alignment;
        }
        alignmentCounter.increment(columnIndex);
    }

    // divide by max to get the range to 0 to 1 and then convert to short and output
    for (int rowIndex = 0; ((rowIndex < blockSize) & ((row + rowIndex) < noOfSequences)); rowIndex++) {
        int columnIndex = 0;
        for (; ((columnIndex < blockSize) & ((column + columnIndex) < noOfSequences)); columnIndex++) {
            double alignment = doubleDistances[rowIndex][columnIndex] / max;
            short scaledScore = (short) (alignment * Short.MAX_VALUE);
            alignments[rowIndex][columnIndex] = scaledScore;
        }
    }

    SWGWritable dataWritable = new SWGWritable(rowBlock, columnBlock, blockSize, false);
    dataWritable.setMax(max);
    dataWritable.setAlignments(alignments);
    context.write(new LongWritable(rowBlock), dataWritable);

    if (!isDiagonal) {
        // Create the transpose matrix of (rowBlock, colBlock) block to fill the
        // (colBlock, rowBlock) block.
        SWGWritable inverseDataWritable = new SWGWritable(columnBlock, rowBlock, blockSize, true);
        inverseDataWritable.setAlignments(alignments);
        context.write(new LongWritable(columnBlock), inverseDataWritable);
    }
    LOG.info("Map time : " + ((System.nanoTime() - startTime) / 1000000) + "ms");
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with LastColumnTakesRest option.
 *//*  www  .ja va 2  s .c o  m*/
public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
        String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
                new Text("hive and hadoop"), null, new Text("a\tb\t") };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with extra columns.
 *//*from ww w  .  j a  v  a  2 s .co m*/
public void testLazySimpleSerDeExtraColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
        String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),
                new Text("hive and hadoop"), null, new Text("a") };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hivesterix.test.serde.SerDeTest.java

License:Apache License

/**
 * Test the LazySimpleSerDe class with missing columns.
 *///from  w  w  w  .j a  v a2s.c om
public void testLazySimpleSerDeMissingColumns() throws Throwable {
    try {
        // Create the SerDe
        LazySimpleSerDe serDe = new LazySimpleSerDe();
        Configuration conf = new Configuration();
        Properties tbl = createProperties();
        serDe.initialize(conf, tbl);

        // Data
        Text t = new Text("123\t456\t789\t1000\t5.3\t");
        String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
        Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
                new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null,
                null };

        // Test
        deserializeAndSerialize(serDe, t, s, expectedFieldsData);

    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.PreappendLongWritableMapper.java

License:Apache License

@Override
public void map(Object[] data, IDataWriter<Object[]> writer) throws HyracksDataException {
    writer.writeData(new Object[] { new LongWritable(0), new Text(String.valueOf(data[0])) });
}