Example usage for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get()

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:edu.umd.cloud9.io.HashMapWritableTest.java

License:Apache License

@Test
public void testSerialize2() throws IOException {
    HashMapWritable<Text, LongWritable> origMap = new HashMapWritable<Text, LongWritable>();

    origMap.put(new Text("hi"), new LongWritable(52));
    origMap.put(new Text("there"), new LongWritable(77));

    ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
    DataOutputStream dataOut = new DataOutputStream(bytesOut);

    origMap.write(dataOut);/*from w ww  .j  a va  2s.c om*/

    HashMapWritable<Text, LongWritable> map = new HashMapWritable<Text, LongWritable>();

    map.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray())));

    Text key;
    LongWritable value;

    assertEquals(map.size(), 2);

    key = new Text("hi");
    value = map.get(key);
    assertTrue(value != null);
    assertEquals(value.get(), 52);

    value = map.remove(key);
    assertEquals(map.size(), 1);

    key = new Text("there");
    value = map.get(key);
    assertTrue(value != null);
    assertEquals(value.get(), 77);
}

From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java

License:Open Source License

private static <T extends TextSerializable> int sampleLocalWithSize(Path[] files,
        final ResultCollector<T> output, OperationsParams params) throws IOException {

    int average_record_size = 1024; // A wild guess for record size
    final LongWritable current_sample_size = new LongWritable();
    int sample_count = 0;

    TextSerializable inObj1, outObj1;//from  ww  w  . j ava  2 s  . c  o  m
    inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2());
    outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2());

    // Make the objects final to be able to use in the anonymous inner class
    final TextSerializable inObj = inObj1;
    final T outObj = (T) outObj1;
    final ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj);

    final ResultCollector<Text2> counter = new ResultCollector<Text2>() {
        @Override
        public void collect(Text2 r) {
            current_sample_size.set(current_sample_size.get() + r.getLength());
            inObj.fromText(r);
            converter.collect(inObj);
        }
    };

    long total_size = params.getLong("size", 0);
    long seed = params.getLong("seed", System.currentTimeMillis());

    while (current_sample_size.get() < total_size) {
        int count = (int) ((total_size - current_sample_size.get()) / average_record_size);
        if (count < 10)
            count = 10;

        OperationsParams params2 = new OperationsParams(params);
        params2.setClass("shape", Text2.class, TextSerializable.class);
        params2.setClass("outshape", Text2.class, TextSerializable.class);
        params2.setInt("count", count);
        params2.setLong("seed", seed);
        sample_count += sampleLocalByCount(files, counter, params2);
        // Change the seed to get different sample next time.
        // Still we need to ensure that repeating the program will generate
        // the same value
        seed += sample_count;
        // Update average_records_size
        average_record_size = (int) (current_sample_size.get() / sample_count);
    }
    return sample_count;
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

void makeSOM(final SifterConfig conf, final SequenceFile.Reader seqRdr, final IndexWriter writer,
        final Writer somJS) throws IOException, InterruptedException, ExecutionException {
    final IntArrayWritable docVec = new IntArrayWritable(TermIndices.size());
    final LongWritable id = new LongWritable();
    final SelfOrganizingMap som = new SelfOrganizingMap(conf.SOM_HEIGHT, conf.SOM_WIDTH, TermIndices.size());
    final SOMBuilder builder = new SOMBuilder(som, conf);
    try {/*from w w w  .  j av a 2 s  .co m*/
        som.init(new Uniform(0.0, 1.0, conf.RANDOM_SEED));

        final double alphaStep = conf.NUM_SOM_ITERATIONS > 1
                ? (conf.MAX_ALPHA - conf.MIN_ALPHA) / (conf.NUM_SOM_ITERATIONS - 1)
                : 0;
        final double radiusStep = conf.NUM_SOM_ITERATIONS > 1
                ? ((double) conf.MAX_NEIGHBOR_RADIUS - conf.MIN_NEIGHBOR_RADIUS) / (conf.NUM_SOM_ITERATIONS - 1)
                : 0;
        final long seqRdrStart = seqRdr.getPosition();

        builder.setSteps(alphaStep, radiusStep);
        for (int i = 0; i < conf.NUM_SOM_ITERATIONS; ++i) {
            builder.iterate(seqRdr);
            System.out.println("Finished iteration " + i);
            seqRdr.seek(seqRdrStart);
        }

        System.out.println("Assigning documents to clusters");
        final ArrayList<ArrayList<Long>> clusters = new ArrayList<ArrayList<Long>>(som.numCells());
        for (int i = 0; i < som.numCells(); ++i) {
            clusters.add(new ArrayList<Long>());
        }
        while (seqRdr.next(id, docVec)) {
            CellDistance winner = null;
            int cellID = -1;
            if (docVec.getLength() > 0) {
                winner = builder.findMin(id.get(), docVec);
                cellID = winner.ID;
            }
            if (cellID > -1) {
                clusters.get(cellID).add(id.get());
                som.assignCell(cellID, winner.Distance);
            }
            addDoc(writer, som, id.get(), winner, cellID);
            //      System.out.println("doc " + id.get() + " is closest to (" + winner.X + ", " + winner.Y + ")");
        }
        System.out.println("Rescaling SOM vectors");
        som.rescale(); // set weights[i] = f[i] * weights[i], f[i] = 1.0; for distance calcs between cells
        System.out.println("Assigning top terms to each cell");
        som.assignTopTerms(conf.NUM_TOP_CELL_TERMS, Terms);
        System.out.println("Calculating greatest neighbor term difference");
        som.assignTermDiffs();
        System.out.println("Assigning cells to regions");
        builder.assignRegions();
        System.out.println("Writing final output");
        somStats(conf, som, clusters, somJS);
    } finally {
        somJS.close();
        builder.shutdown();
        seqRdr.close();
    }
}

From source file:edu.utsa.sifter.som.SOMBuilder.java

License:Apache License

void iterate(final SequenceFile.Reader docs) throws IOException, InterruptedException, ExecutionException {
    System.out.println("CurAlpha = " + CurAlpha + ", CurRadius = " + CurRadius);
    // SOM.recalculateS2();
    int numCycles = 0;
    int numMinsFound = 0;
    LongWritable id = new LongWritable();
    final IntArrayWritable docVec = new IntArrayWritable(256);
    while (docs.next(id, docVec)) {
        if (docVec.getLength() > 0 && processDoc(id.get(), docVec)) {
            ++numMinsFound;//from  w  w w.  j a va 2 s  .c om
        }
        ++numCycles;
        if (numCycles % 1000 == 0) {
            System.out.println(numCycles + " cycles processed");
        }
    }
    System.out.println(
            "processed " + numCycles + " docs in this iteration, " + numMinsFound + " had closest cells");
    CurRadius -= RadiusStep;
    CurAlpha -= AlphaStep;
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInputCustomDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    String inputData = "abcdefghij++kl++mno";
    Path inputFile = createInputFile(conf, inputData);
    String delimiter = "++";
    byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    int splitLength = 15;
    FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);/*from   w w  w  .ja va 2s .  co m*/
    // Get first record: "abcdefghij"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    LongWritable key = reader.getCurrentKey();
    Text value = reader.getCurrentValue();
    assertEquals("Wrong length for record value", 10, value.getLength());
    assertEquals("Wrong position after record read", 0, key.get());
    // Get second record: "kl"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 2, value.getLength());
    // Key should be 12 right after "abcdefghij++"
    assertEquals("Wrong position after record read", 12, key.get());
    // Get third record: "mno"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Key should be 16 right after "abcdefghij++kl++"
    assertEquals("Wrong position after record read", 16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "abcdefghij++kl++mno"
    assertEquals("Wrong position after record read", 19, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // No record is in the second split because the second split dropped
    // the first record, which was already reported by the first split.
    assertFalse("Unexpected record returned", reader.nextKeyValue());
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();

    // multi char delimiter with starting part of the delimiter in the data
    inputData = "abcd+efgh++ijk++mno";
    inputFile = createInputFile(conf, inputData);
    splitLength = 5;
    split = new FileSplit(inputFile, 0, splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    // Get first record: "abcd+efgh"
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    assertEquals("Wrong position after record read", 0, key.get());
    assertEquals("Wrong length for record value", 9, value.getLength());
    // should have jumped over the delimiter, no record
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 11, key.get());
    // after refresh should be empty
    key = reader.getCurrentKey();
    assertNull("Unexpected key returned", key);
    reader.close();
    // next split: check for duplicate or dropped records
    split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null);
    reader = new LineRecordReader(recordDelimiterBytes);
    reader.initialize(split, context);
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get second record: "ijk" first in this split
    assertEquals("Wrong position after record read", 11, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // Get third record: "mno" second in this split
    assertTrue("Expected record got nothing", reader.nextKeyValue());
    assertEquals("Wrong position after record read", 16, key.get());
    assertEquals("Wrong length for record value", 3, value.getLength());
    // should be at the end of the input
    assertFalse(reader.nextKeyValue());
    assertEquals("Wrong position after record read", 19, key.get());
    reader.close();

    inputData = "abcd|efgh|+|ij|kl|+|mno|pqr";
    inputFile = createInputFile(conf, inputData);
    delimiter = "|+|";
    recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    // walking over the buffer and split sizes checks for proper processing
    // of the ambiguous bytes of the delimiter
    for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
        for (int splitSize = 1; splitSize < inputData.length(); splitSize++) {
            // track where we are in the inputdata
            int keyPosition = 0;
            conf.setInt("io.file.buffer.size", bufferSize);
            split = new FileSplit(inputFile, 0, bufferSize, (String[]) null);
            reader = new LineRecordReader(recordDelimiterBytes);
            reader.initialize(split, context);
            // Get the first record: "abcd|efgh" always possible
            assertTrue("Expected record got nothing", reader.nextKeyValue());
            key = reader.getCurrentKey();
            value = reader.getCurrentValue();
            assertTrue("abcd|efgh".equals(value.toString()));
            // Position should be 0 right at the start
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // Position should be 12 right after the first "|+|"
            keyPosition = 12;
            // get the next record: "ij|kl" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "ij|kl"
                assertTrue("ij|kl".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be 20 after the second "|+|"
                keyPosition = 20;
            }
            // get the third record: "mno|pqr" if the split/buffer allows it
            if (reader.nextKeyValue()) {
                // check the record info: "mno|pqr"
                assertTrue("mno|pqr".equals(value.toString()));
                assertEquals("Wrong position after record read", keyPosition, key.get());
                // Position should be the end of the input
                keyPosition = inputData.length();
            }
            assertFalse("Unexpected record returned", reader.nextKeyValue());
            // no more records can be read we should be at the last position
            assertEquals("Wrong position after record read", keyPosition, key.get());
            // after refresh should be empty
            key = reader.getCurrentKey();
            assertNull("Unexpected key returned", key);
            reader.close();
        }
    }
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testUncompressedInputDefaultDelimiterPosValue() throws Exception {
    Configuration conf = new Configuration();
    String inputData = "1234567890\r\n12\r\n345";
    Path inputFile = createInputFile(conf, inputData);
    conf.setInt("io.file.buffer.size", 10);
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LineRecordReader reader = new LineRecordReader(null);
    reader.initialize(split, context);//  ww  w.j av a 2 s . c  om
    LongWritable key;
    Text value;
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"1234567890"
    assertEquals(10, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:"12"
    assertEquals(2, value.getLength());
    // Key should be 12 right after "1234567890\r\n"
    assertEquals(12, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());

    split = new FileSplit(inputFile, 15, 4, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    // The second split dropped the first record "\n"
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get third record:"345"
    assertEquals(3, value.getLength());
    // Key should be 16 right after "1234567890\r\n12\r\n"
    assertEquals(16, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 19 right after "1234567890\r\n12\r\n345"
    assertEquals(19, key.get());

    inputData = "123456789\r\r\n";
    inputFile = createInputFile(conf, inputData);
    split = new FileSplit(inputFile, 0, 12, (String[]) null);
    reader = new LineRecordReader(null);
    reader.initialize(split, context);
    reader.nextKeyValue();
    key = reader.getCurrentKey();
    value = reader.getCurrentValue();
    // Get first record:"123456789"
    assertEquals(9, value.getLength());
    assertEquals(0, key.get());
    reader.nextKeyValue();
    // Get second record:""
    assertEquals(0, value.getLength());
    // Key should be 10 right after "123456789\r"
    assertEquals(10, key.get());
    assertFalse(reader.nextKeyValue());
    // Key should be 12 right after "123456789\r\r\n"
    assertEquals(12, key.get());
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java

License:Open Source License

@Override
protected void reduce(LongWritable key, Iterable<Range> ranges,
        Reducer<LongWritable, Range, NullWritable, RangeCount>.Context context)
        throws IOException, InterruptedException {
    final int referenceID = (int) (key.get() >>> 32);

    // When the reference sequence changes we have to flush out everything
    // we've got and start from scratch again.
    if (referenceID != currentReferenceID) {
        currentReferenceID = referenceID;
        doAllSummaries();// ww  w  .  ja v a2s  .c  om
    }

    for (final Range range : ranges) {
        final int beg = range.beg.get(), end = range.end.get();

        final List<SummaryGroup> summaryGroups = range.reverseStrand.get() ? summaryGroupsR : summaryGroupsF;

        for (SummaryGroup group : summaryGroups) {
            group.sumBeg += beg;
            group.sumEnd += end;
            if (++group.count == group.level)
                doSummary(group);
        }
    }
}

From source file:fileformats.AuctionVertexValue.java

License:Apache License

public void setColOwned(LongWritable l) {
    colOwned.set(l.get());
}

From source file:fileformats.AuctionVertexValue.java

License:Apache License

public void setRowOwnedBy(LongWritable l) {
    rowOwnedBy.set(l.get());
}

From source file:fm.last.darling.mapred.MapperWrapper.java

License:Apache License

public void map(LongWritable key, Text value, OutputCollector<NSpacePoint, IntWritable> collector,
        Reporter reporter) throws IOException {
    // call on user's mapper
    ZohmgOutputCollector o = new ZohmgOutputCollector();
    usermapper.map(key.get(), value.toString(), o);
    long ts = o.getTimestamp();
    TreeMap<String, String> points = o.getDimensions();

    // fan out into projections,

    for (List<Dimension> requested : rps) {
        // reduce down to the dimensions we are interested in.
        TreeMap<String, String> projection = Projectionist.dimensionality_reduction(requested, points);
        // emit once for every unit.
        for (String unit : o.measurementUnits()) {
            NSpacePoint point = new NSpacePoint(ts, projection, unit);
            collector.collect(point, o.getMeasurement(unit));
        }/*  w  w w .  j a  v a  2 s  .  co  m*/
    }
}