List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:edu.umd.cloud9.io.HashMapWritableTest.java
License:Apache License
@Test public void testSerialize2() throws IOException { HashMapWritable<Text, LongWritable> origMap = new HashMapWritable<Text, LongWritable>(); origMap.put(new Text("hi"), new LongWritable(52)); origMap.put(new Text("there"), new LongWritable(77)); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); origMap.write(dataOut);/*from w ww .j a va 2s.c om*/ HashMapWritable<Text, LongWritable> map = new HashMapWritable<Text, LongWritable>(); map.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); Text key; LongWritable value; assertEquals(map.size(), 2); key = new Text("hi"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 52); value = map.remove(key); assertEquals(map.size(), 1); key = new Text("there"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 77); }
From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java
License:Open Source License
private static <T extends TextSerializable> int sampleLocalWithSize(Path[] files, final ResultCollector<T> output, OperationsParams params) throws IOException { int average_record_size = 1024; // A wild guess for record size final LongWritable current_sample_size = new LongWritable(); int sample_count = 0; TextSerializable inObj1, outObj1;//from ww w . j ava 2 s . c o m inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2()); outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2()); // Make the objects final to be able to use in the anonymous inner class final TextSerializable inObj = inObj1; final T outObj = (T) outObj1; final ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj); final ResultCollector<Text2> counter = new ResultCollector<Text2>() { @Override public void collect(Text2 r) { current_sample_size.set(current_sample_size.get() + r.getLength()); inObj.fromText(r); converter.collect(inObj); } }; long total_size = params.getLong("size", 0); long seed = params.getLong("seed", System.currentTimeMillis()); while (current_sample_size.get() < total_size) { int count = (int) ((total_size - current_sample_size.get()) / average_record_size); if (count < 10) count = 10; OperationsParams params2 = new OperationsParams(params); params2.setClass("shape", Text2.class, TextSerializable.class); params2.setClass("outshape", Text2.class, TextSerializable.class); params2.setInt("count", count); params2.setLong("seed", seed); sample_count += sampleLocalByCount(files, counter, params2); // Change the seed to get different sample next time. // Still we need to ensure that repeating the program will generate // the same value seed += sample_count; // Update average_records_size average_record_size = (int) (current_sample_size.get() / sample_count); } return sample_count; }
From source file:edu.utsa.sifter.som.MainSOM.java
License:Apache License
void makeSOM(final SifterConfig conf, final SequenceFile.Reader seqRdr, final IndexWriter writer, final Writer somJS) throws IOException, InterruptedException, ExecutionException { final IntArrayWritable docVec = new IntArrayWritable(TermIndices.size()); final LongWritable id = new LongWritable(); final SelfOrganizingMap som = new SelfOrganizingMap(conf.SOM_HEIGHT, conf.SOM_WIDTH, TermIndices.size()); final SOMBuilder builder = new SOMBuilder(som, conf); try {/*from w w w . j av a 2 s .co m*/ som.init(new Uniform(0.0, 1.0, conf.RANDOM_SEED)); final double alphaStep = conf.NUM_SOM_ITERATIONS > 1 ? (conf.MAX_ALPHA - conf.MIN_ALPHA) / (conf.NUM_SOM_ITERATIONS - 1) : 0; final double radiusStep = conf.NUM_SOM_ITERATIONS > 1 ? ((double) conf.MAX_NEIGHBOR_RADIUS - conf.MIN_NEIGHBOR_RADIUS) / (conf.NUM_SOM_ITERATIONS - 1) : 0; final long seqRdrStart = seqRdr.getPosition(); builder.setSteps(alphaStep, radiusStep); for (int i = 0; i < conf.NUM_SOM_ITERATIONS; ++i) { builder.iterate(seqRdr); System.out.println("Finished iteration " + i); seqRdr.seek(seqRdrStart); } System.out.println("Assigning documents to clusters"); final ArrayList<ArrayList<Long>> clusters = new ArrayList<ArrayList<Long>>(som.numCells()); for (int i = 0; i < som.numCells(); ++i) { clusters.add(new ArrayList<Long>()); } while (seqRdr.next(id, docVec)) { CellDistance winner = null; int cellID = -1; if (docVec.getLength() > 0) { winner = builder.findMin(id.get(), docVec); cellID = winner.ID; } if (cellID > -1) { clusters.get(cellID).add(id.get()); som.assignCell(cellID, winner.Distance); } addDoc(writer, som, id.get(), winner, cellID); // System.out.println("doc " + id.get() + " is closest to (" + winner.X + ", " + winner.Y + ")"); } System.out.println("Rescaling SOM vectors"); som.rescale(); // set weights[i] = f[i] * weights[i], f[i] = 1.0; for distance calcs between cells System.out.println("Assigning top terms to each cell"); som.assignTopTerms(conf.NUM_TOP_CELL_TERMS, Terms); System.out.println("Calculating greatest neighbor term difference"); som.assignTermDiffs(); System.out.println("Assigning cells to regions"); builder.assignRegions(); System.out.println("Writing final output"); somStats(conf, som, clusters, somJS); } finally { somJS.close(); builder.shutdown(); seqRdr.close(); } }
From source file:edu.utsa.sifter.som.SOMBuilder.java
License:Apache License
void iterate(final SequenceFile.Reader docs) throws IOException, InterruptedException, ExecutionException { System.out.println("CurAlpha = " + CurAlpha + ", CurRadius = " + CurRadius); // SOM.recalculateS2(); int numCycles = 0; int numMinsFound = 0; LongWritable id = new LongWritable(); final IntArrayWritable docVec = new IntArrayWritable(256); while (docs.next(id, docVec)) { if (docVec.getLength() > 0 && processDoc(id.get(), docVec)) { ++numMinsFound;//from w w w. j a va 2 s .c om } ++numCycles; if (numCycles % 1000 == 0) { System.out.println(numCycles + " cycles processed"); } } System.out.println( "processed " + numCycles + " docs in this iteration, " + numMinsFound + " had closest cells"); CurRadius -= RadiusStep; CurAlpha -= AlphaStep; }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInputCustomDelimiterPosValue() throws Exception { Configuration conf = new Configuration(); conf.setInt("io.file.buffer.size", 10); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); String inputData = "abcdefghij++kl++mno"; Path inputFile = createInputFile(conf, inputData); String delimiter = "++"; byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); int splitLength = 15; FileSplit split = new FileSplit(inputFile, 0, splitLength, (String[]) null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); LineRecordReader reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context);/*from w w w .ja va 2s . co m*/ // Get first record: "abcdefghij" assertTrue("Expected record got nothing", reader.nextKeyValue()); LongWritable key = reader.getCurrentKey(); Text value = reader.getCurrentValue(); assertEquals("Wrong length for record value", 10, value.getLength()); assertEquals("Wrong position after record read", 0, key.get()); // Get second record: "kl" assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong length for record value", 2, value.getLength()); // Key should be 12 right after "abcdefghij++" assertEquals("Wrong position after record read", 12, key.get()); // Get third record: "mno" assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong length for record value", 3, value.getLength()); // Key should be 16 right after "abcdefghij++kl++" assertEquals("Wrong position after record read", 16, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 19 right after "abcdefghij++kl++mno" assertEquals("Wrong position after record read", 19, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // No record is in the second split because the second split dropped // the first record, which was already reported by the first split. assertFalse("Unexpected record returned", reader.nextKeyValue()); key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); // multi char delimiter with starting part of the delimiter in the data inputData = "abcd+efgh++ijk++mno"; inputFile = createInputFile(conf, inputData); splitLength = 5; split = new FileSplit(inputFile, 0, splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // Get first record: "abcd+efgh" assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); assertEquals("Wrong position after record read", 0, key.get()); assertEquals("Wrong length for record value", 9, value.getLength()); // should have jumped over the delimiter, no record assertFalse(reader.nextKeyValue()); assertEquals("Wrong position after record read", 11, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); // next split: check for duplicate or dropped records split = new FileSplit(inputFile, splitLength, inputData.length() - splitLength, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get second record: "ijk" first in this split assertEquals("Wrong position after record read", 11, key.get()); assertEquals("Wrong length for record value", 3, value.getLength()); // Get third record: "mno" second in this split assertTrue("Expected record got nothing", reader.nextKeyValue()); assertEquals("Wrong position after record read", 16, key.get()); assertEquals("Wrong length for record value", 3, value.getLength()); // should be at the end of the input assertFalse(reader.nextKeyValue()); assertEquals("Wrong position after record read", 19, key.get()); reader.close(); inputData = "abcd|efgh|+|ij|kl|+|mno|pqr"; inputFile = createInputFile(conf, inputData); delimiter = "|+|"; recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8); // walking over the buffer and split sizes checks for proper processing // of the ambiguous bytes of the delimiter for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) { for (int splitSize = 1; splitSize < inputData.length(); splitSize++) { // track where we are in the inputdata int keyPosition = 0; conf.setInt("io.file.buffer.size", bufferSize); split = new FileSplit(inputFile, 0, bufferSize, (String[]) null); reader = new LineRecordReader(recordDelimiterBytes); reader.initialize(split, context); // Get the first record: "abcd|efgh" always possible assertTrue("Expected record got nothing", reader.nextKeyValue()); key = reader.getCurrentKey(); value = reader.getCurrentValue(); assertTrue("abcd|efgh".equals(value.toString())); // Position should be 0 right at the start assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be 12 right after the first "|+|" keyPosition = 12; // get the next record: "ij|kl" if the split/buffer allows it if (reader.nextKeyValue()) { // check the record info: "ij|kl" assertTrue("ij|kl".equals(value.toString())); assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be 20 after the second "|+|" keyPosition = 20; } // get the third record: "mno|pqr" if the split/buffer allows it if (reader.nextKeyValue()) { // check the record info: "mno|pqr" assertTrue("mno|pqr".equals(value.toString())); assertEquals("Wrong position after record read", keyPosition, key.get()); // Position should be the end of the input keyPosition = inputData.length(); } assertFalse("Unexpected record returned", reader.nextKeyValue()); // no more records can be read we should be at the last position assertEquals("Wrong position after record read", keyPosition, key.get()); // after refresh should be empty key = reader.getCurrentKey(); assertNull("Unexpected key returned", key); reader.close(); } } }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testUncompressedInputDefaultDelimiterPosValue() throws Exception { Configuration conf = new Configuration(); String inputData = "1234567890\r\n12\r\n345"; Path inputFile = createInputFile(conf, inputData); conf.setInt("io.file.buffer.size", 10); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); FileSplit split = new FileSplit(inputFile, 0, 15, (String[]) null); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); LineRecordReader reader = new LineRecordReader(null); reader.initialize(split, context);// ww w.j av a 2 s . c om LongWritable key; Text value; reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get first record:"1234567890" assertEquals(10, value.getLength()); assertEquals(0, key.get()); reader.nextKeyValue(); // Get second record:"12" assertEquals(2, value.getLength()); // Key should be 12 right after "1234567890\r\n" assertEquals(12, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 16 right after "1234567890\r\n12\r\n" assertEquals(16, key.get()); split = new FileSplit(inputFile, 15, 4, (String[]) null); reader = new LineRecordReader(null); reader.initialize(split, context); // The second split dropped the first record "\n" reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get third record:"345" assertEquals(3, value.getLength()); // Key should be 16 right after "1234567890\r\n12\r\n" assertEquals(16, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 19 right after "1234567890\r\n12\r\n345" assertEquals(19, key.get()); inputData = "123456789\r\r\n"; inputFile = createInputFile(conf, inputData); split = new FileSplit(inputFile, 0, 12, (String[]) null); reader = new LineRecordReader(null); reader.initialize(split, context); reader.nextKeyValue(); key = reader.getCurrentKey(); value = reader.getCurrentValue(); // Get first record:"123456789" assertEquals(9, value.getLength()); assertEquals(0, key.get()); reader.nextKeyValue(); // Get second record:"" assertEquals(0, value.getLength()); // Key should be 10 right after "123456789\r" assertEquals(10, key.get()); assertFalse(reader.nextKeyValue()); // Key should be 12 right after "123456789\r\r\n" assertEquals(12, key.get()); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.chipster.Summarize.java
License:Open Source License
@Override protected void reduce(LongWritable key, Iterable<Range> ranges, Reducer<LongWritable, Range, NullWritable, RangeCount>.Context context) throws IOException, InterruptedException { final int referenceID = (int) (key.get() >>> 32); // When the reference sequence changes we have to flush out everything // we've got and start from scratch again. if (referenceID != currentReferenceID) { currentReferenceID = referenceID; doAllSummaries();// ww w . ja v a2s .c om } for (final Range range : ranges) { final int beg = range.beg.get(), end = range.end.get(); final List<SummaryGroup> summaryGroups = range.reverseStrand.get() ? summaryGroupsR : summaryGroupsF; for (SummaryGroup group : summaryGroups) { group.sumBeg += beg; group.sumEnd += end; if (++group.count == group.level) doSummary(group); } } }
From source file:fileformats.AuctionVertexValue.java
License:Apache License
public void setColOwned(LongWritable l) { colOwned.set(l.get()); }
From source file:fileformats.AuctionVertexValue.java
License:Apache License
public void setRowOwnedBy(LongWritable l) { rowOwnedBy.set(l.get()); }
From source file:fm.last.darling.mapred.MapperWrapper.java
License:Apache License
public void map(LongWritable key, Text value, OutputCollector<NSpacePoint, IntWritable> collector, Reporter reporter) throws IOException { // call on user's mapper ZohmgOutputCollector o = new ZohmgOutputCollector(); usermapper.map(key.get(), value.toString(), o); long ts = o.getTimestamp(); TreeMap<String, String> points = o.getDimensions(); // fan out into projections, for (List<Dimension> requested : rps) { // reduce down to the dimensions we are interested in. TreeMap<String, String> projection = Projectionist.dimensionality_reduction(requested, points); // emit once for every unit. for (String unit : o.measurementUnits()) { NSpacePoint point = new NSpacePoint(ts, projection, unit); collector.collect(point, o.getMeasurement(unit)); }/* w w w . j a v a 2 s . co m*/ } }