List of usage examples for org.apache.hadoop.io LongWritable set
public void set(long value)
From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java
License:Open Source License
private static <T extends TextSerializable> int sampleLocalWithSize(Path[] files, final ResultCollector<T> output, OperationsParams params) throws IOException { int average_record_size = 1024; // A wild guess for record size final LongWritable current_sample_size = new LongWritable(); int sample_count = 0; TextSerializable inObj1, outObj1;/*from w w w.ja v a2 s . co m*/ inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2()); outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2()); // Make the objects final to be able to use in the anonymous inner class final TextSerializable inObj = inObj1; final T outObj = (T) outObj1; final ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj); final ResultCollector<Text2> counter = new ResultCollector<Text2>() { @Override public void collect(Text2 r) { current_sample_size.set(current_sample_size.get() + r.getLength()); inObj.fromText(r); converter.collect(inObj); } }; long total_size = params.getLong("size", 0); long seed = params.getLong("seed", System.currentTimeMillis()); while (current_sample_size.get() < total_size) { int count = (int) ((total_size - current_sample_size.get()) / average_record_size); if (count < 10) count = 10; OperationsParams params2 = new OperationsParams(params); params2.setClass("shape", Text2.class, TextSerializable.class); params2.setClass("outshape", Text2.class, TextSerializable.class); params2.setInt("count", count); params2.setLong("seed", seed); sample_count += sampleLocalByCount(files, counter, params2); // Change the seed to get different sample next time. // Still we need to ensure that repeating the program will generate // the same value seed += sample_count; // Update average_records_size average_record_size = (int) (current_sample_size.get() / sample_count); } return sample_count; }
From source file:edu.utsa.sifter.som.MainSOM.java
License:Apache License
void writeVectors(final SequenceFile.Writer file) throws IOException, CorruptIndexException, NoSuchFieldException { System.out.println("Creating document term vectors"); final LongWritable id = new LongWritable(); final IntArrayWritable vec = new IntArrayWritable(TermIndices.size()); final HashSet<String> idFields = new HashSet(); idFields.add("ID"); int max = Reader.maxDoc(); int noTVs = 0; TermsEnum term = null;//w w w. ja v a 2s . co m // iterate docs for (int i = 0; i < max; ++i) { vec.clear(); final Document doc = Reader.document(i, idFields); final IndexableField idField = doc.getField("ID"); if (idField == null) { throw new NoSuchFieldException("document " + i + " does not have an ID field"); } id.set(Long.parseLong(idField.stringValue())); // get term vector for body field final Terms terms = Reader.getTermVector(i, "body"); if (terms != null) { // count terms in doc int numTerms = 0; term = terms.iterator(term); int j = 0; while (term.next() != null) { // System.out.println("doc " + i + " had term '" + term.term().utf8ToString() + "'"); // System.out.println("doc freq: " + term.docFreq()); // System.out.println("ord: " + term.ord()); // System.out.println("totalTermFreq: " + term.totalTermFreq()); Integer index = TermIndices.get(term.term().utf8ToString()); if (index != null) { vec.add(index); ++numTerms; } } if (numTerms > 0) { // System.out.println("doc " + i + " had " + numTerms + " terms"); MaxDocTerms = Math.max(MaxDocTerms, numTerms); SumDocTerms += numTerms; } } else { ++noTVs; // System.err.println("doc " + i + " had no term vector for body"); } if (vec.getLength() == 0) { ++NumOutliers; } file.append(id, vec); ++NumDocsWritten; } System.out.println(noTVs + " docs had no term vectors"); }
From source file:edu.yale.cs.hadoopdb.connector.DBRecordReader.java
License:Apache License
/** * Reads the next record from the result set and passes the result set to the value Object to * extract necessary fields. Increments the number of rows read in. * @return false if no more rows exist.//from w w w. j ava2 s .com */ @Override public boolean next(LongWritable key, T value) throws IOException { try { if (!results.next()) return false; key.set(pos); value.readFields(results); pos++; } catch (SQLException e) { throw new IOException(e); } return true; }
From source file:edu.yale.cs.hadoopdb.sms.connector.SMSRecordReader.java
License:Apache License
/** * Retrieves each row from the result set, serializes it * using {@link ParseSchema} and increments the number of rows * read in./*from www .java2s.c om*/ * @return false if no more rows exist. */ @Override public boolean next(LongWritable key, Text value) throws IOException { try { if (!results.next()) return false; key.set(pos); value.set(parseResults()); pos++; } catch (SQLException e) { throw new IOException(e); } return true; }
From source file:lennard.PiRecordReader.java
License:Apache License
/** Read a line. */ public synchronized boolean next(LongWritable key, LongWritable value) throws IOException { Text line = new Text(); while (pos < end) { int newSize = in.readLine(line, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); String[] s = line.toString().split("\t"); key.set(Long.parseLong(s[0])); value.set(Long.parseLong(s[1])); if (newSize == 0) { return false; }/*from w w w.j a v a 2 s . co m*/ pos += newSize; if (newSize < maxLineLength) { return true; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } return false; }
From source file:mr.MyFileRecordReader2.java
License:Apache License
/** Read a line. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { // We always read one extra line, which lies outside the upper // split limit i.e. (end - 1) while (getFilePosition() <= end) {//|| in.needAdditionalRecordAfterSplit()) { key.set(pos); int newSize = 0; if (pos == 0) { newSize = skipUtfByteOrderMark(value); } else {/*from w w w. ja v a2 s. co m*/ newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos)); pos += newSize; } if (newSize == 0) { return false; } if (newSize < maxLineLength) { return true; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } return false; }
From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java
License:Apache License
public boolean next(LongWritable longWritable, List<Text> texts) throws IOException { if (!nextKeyValue()) { System.out.println("[LOG] Called next, was false"); return false; }/*from ww w.j av a 2s .c o m*/ longWritable.set(this.getCurrentKey().get()); texts.clear(); for (Text text : this.getCurrentValue()) { texts.add(text); } return true; }
From source file:newprotobuf.mapred.ProtobufRecordReader.java
License:Open Source License
public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException { size = 0;/*from w ww .ja v a 2 s .c o m*/ boolean readend = readLittleEndianInt(in); if (readend) { LOG.info("read the pb file completely"); return false; } if (size < 0) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("get size " + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } pos += 2; if (size == 0) { value.set(buffer, 0, 0); return true; } pos += size; key.set(pos); int readlen = 0; if (size < buffer.length) { int already_read = 0; while (already_read < size) { readlen = in.read(buffer, already_read, size - already_read); if (readlen == -1) { if (already_read < size) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("current read size" + readlen + " but expected size:" + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } else break; } already_read += readlen; } value.set(buffer, 0, size); } else { byte[] tmp = new byte[size]; int already_read = 0; while (already_read < size) { readlen = in.read(tmp, already_read, size - already_read); if (readlen == -1) { if (already_read < size) { LOG.info("Parse the pbfile error:" + file.toUri().toString()); LOG.info("current read size" + readlen + " but expected size:" + size); if (skipbad) { LOG.info("Skip the bad file"); reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1); return false; } else { throw (new IOException("Bad format pbfile")); } } else break; } already_read += readlen; } value.set(tmp, 0, size); } return true; }
From source file:nl.tudelft.graphalytics.mapreducev2.common.DirectedNodeNeighbourRecordReader.java
License:Apache License
public boolean next(LongWritable key, DirectedNodeNeighbourhood value) throws IOException { if (!lineReader.next(lineKey, lineValue)) { return false; }/*from w w w . ja v a 2s . co m*/ key.set(lineKey.get()); DirectedNodeNeighbourhood tmp = new DirectedNodeNeighbourhood(this.textValueToObj(lineValue)); value.setCentralNode(tmp.getCentralNode()); value.setDirectedNodeNeighbourhood(tmp.getDirectedNodeNeighbourhood()); return true; }
From source file:nl.tudelft.graphalytics.mapreducev2.common.UndirectedNodeNeighbourRecordReader.java
License:Apache License
public boolean next(LongWritable key, UndirectedNodeNeighbourhood value) throws IOException { if (!lineReader.next(lineKey, lineValue)) { return false; }/* w w w. j a v a 2 s .c om*/ key.set(lineKey.get()); UndirectedNodeNeighbourhood tmp = new UndirectedNodeNeighbourhood(this.textValueToObj(lineValue)); value.setCentralNode(tmp.getCentralNode()); value.setNodeNeighbourhood(tmp.getNodeNeighbourhood()); return true; }