Example usage for org.apache.hadoop.io LongWritable set

List of usage examples for org.apache.hadoop.io LongWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable set.

Prototype

public void set(long value) 

Source Link

Document

Set the value of this LongWritable.

Usage

From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java

License:Open Source License

private static <T extends TextSerializable> int sampleLocalWithSize(Path[] files,
        final ResultCollector<T> output, OperationsParams params) throws IOException {

    int average_record_size = 1024; // A wild guess for record size
    final LongWritable current_sample_size = new LongWritable();
    int sample_count = 0;

    TextSerializable inObj1, outObj1;/*from   w w w.ja  v a2  s . co m*/
    inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2());
    outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2());

    // Make the objects final to be able to use in the anonymous inner class
    final TextSerializable inObj = inObj1;
    final T outObj = (T) outObj1;
    final ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj);

    final ResultCollector<Text2> counter = new ResultCollector<Text2>() {
        @Override
        public void collect(Text2 r) {
            current_sample_size.set(current_sample_size.get() + r.getLength());
            inObj.fromText(r);
            converter.collect(inObj);
        }
    };

    long total_size = params.getLong("size", 0);
    long seed = params.getLong("seed", System.currentTimeMillis());

    while (current_sample_size.get() < total_size) {
        int count = (int) ((total_size - current_sample_size.get()) / average_record_size);
        if (count < 10)
            count = 10;

        OperationsParams params2 = new OperationsParams(params);
        params2.setClass("shape", Text2.class, TextSerializable.class);
        params2.setClass("outshape", Text2.class, TextSerializable.class);
        params2.setInt("count", count);
        params2.setLong("seed", seed);
        sample_count += sampleLocalByCount(files, counter, params2);
        // Change the seed to get different sample next time.
        // Still we need to ensure that repeating the program will generate
        // the same value
        seed += sample_count;
        // Update average_records_size
        average_record_size = (int) (current_sample_size.get() / sample_count);
    }
    return sample_count;
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

void writeVectors(final SequenceFile.Writer file)
        throws IOException, CorruptIndexException, NoSuchFieldException {
    System.out.println("Creating document term vectors");
    final LongWritable id = new LongWritable();
    final IntArrayWritable vec = new IntArrayWritable(TermIndices.size());
    final HashSet<String> idFields = new HashSet();
    idFields.add("ID");

    int max = Reader.maxDoc();
    int noTVs = 0;

    TermsEnum term = null;//w  w w. ja v  a 2s  .  co m
    // iterate docs
    for (int i = 0; i < max; ++i) {
        vec.clear();
        final Document doc = Reader.document(i, idFields);
        final IndexableField idField = doc.getField("ID");
        if (idField == null) {
            throw new NoSuchFieldException("document " + i + " does not have an ID field");
        }
        id.set(Long.parseLong(idField.stringValue()));

        // get term vector for body field
        final Terms terms = Reader.getTermVector(i, "body");
        if (terms != null) {
            // count terms in doc
            int numTerms = 0;
            term = terms.iterator(term);
            int j = 0;
            while (term.next() != null) {
                // System.out.println("doc " + i + " had term '" + term.term().utf8ToString() + "'");
                // System.out.println("doc freq: " + term.docFreq());
                // System.out.println("ord: " + term.ord());
                // System.out.println("totalTermFreq: " + term.totalTermFreq());
                Integer index = TermIndices.get(term.term().utf8ToString());
                if (index != null) {
                    vec.add(index);
                    ++numTerms;
                }
            }
            if (numTerms > 0) {
                // System.out.println("doc " + i + " had " + numTerms + " terms");
                MaxDocTerms = Math.max(MaxDocTerms, numTerms);
                SumDocTerms += numTerms;
            }
        } else {
            ++noTVs;
            // System.err.println("doc " + i + " had no term vector for body");
        }
        if (vec.getLength() == 0) {
            ++NumOutliers;
        }
        file.append(id, vec);
        ++NumDocsWritten;
    }
    System.out.println(noTVs + " docs had no term vectors");
}

From source file:edu.yale.cs.hadoopdb.connector.DBRecordReader.java

License:Apache License

/**
 * Reads the next record from the result set and passes the result set to the value Object to
 * extract necessary fields. Increments the number of rows read in.
 * @return false if no more rows exist.//from   w w  w. j ava2  s  .com
 */
@Override
public boolean next(LongWritable key, T value) throws IOException {
    try {
        if (!results.next())
            return false;

        key.set(pos);
        value.readFields(results);

        pos++;
    } catch (SQLException e) {
        throw new IOException(e);
    }
    return true;
}

From source file:edu.yale.cs.hadoopdb.sms.connector.SMSRecordReader.java

License:Apache License

/**
 * Retrieves each row from the result set, serializes it 
 * using {@link ParseSchema} and increments the number of rows
 * read in./*from www .java2s.c om*/
 * @return false if no more rows exist.
 */
@Override
public boolean next(LongWritable key, Text value) throws IOException {
    try {
        if (!results.next())
            return false;
        key.set(pos);
        value.set(parseResults());
        pos++;
    } catch (SQLException e) {
        throw new IOException(e);
    }
    return true;
}

From source file:lennard.PiRecordReader.java

License:Apache License

/** Read a line. */
public synchronized boolean next(LongWritable key, LongWritable value) throws IOException {

    Text line = new Text();
    while (pos < end) {
        int newSize = in.readLine(line, maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));

        String[] s = line.toString().split("\t");
        key.set(Long.parseLong(s[0]));
        value.set(Long.parseLong(s[1]));

        if (newSize == 0) {
            return false;
        }/*from   w w w.j  a v  a 2 s .  co  m*/
        pos += newSize;
        if (newSize < maxLineLength) {
            return true;
        }

        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    return false;
}

From source file:mr.MyFileRecordReader2.java

License:Apache License

/** Read a line. */
public synchronized boolean next(LongWritable key, Text value) throws IOException {

    // We always read one extra line, which lies outside the upper
    // split limit i.e. (end - 1)
    while (getFilePosition() <= end) {//|| in.needAdditionalRecordAfterSplit()) {
        key.set(pos);

        int newSize = 0;
        if (pos == 0) {
            newSize = skipUtfByteOrderMark(value);
        } else {/*from   w  w  w. ja  v  a2 s. co m*/
            newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos));
            pos += newSize;
        }

        if (newSize == 0) {
            return false;
        }
        if (newSize < maxLineLength) {
            return true;
        }

        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    return false;
}

From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java

License:Apache License

public boolean next(LongWritable longWritable, List<Text> texts) throws IOException {
    if (!nextKeyValue()) {
        System.out.println("[LOG] Called next, was false");
        return false;
    }/*from   ww  w.j  av a 2s  .c  o m*/
    longWritable.set(this.getCurrentKey().get());
    texts.clear();
    for (Text text : this.getCurrentValue()) {
        texts.add(text);
    }
    return true;
}

From source file:newprotobuf.mapred.ProtobufRecordReader.java

License:Open Source License

public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException {

    size = 0;/*from   w ww  .ja v  a 2 s .c o m*/
    boolean readend = readLittleEndianInt(in);
    if (readend) {
        LOG.info("read the pb file completely");
        return false;
    }

    if (size < 0) {
        LOG.info("Parse the pbfile error:" + file.toUri().toString());
        LOG.info("get size " + size);
        if (skipbad) {
            LOG.info("Skip the bad file");
            reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
            return false;
        } else {
            throw (new IOException("Bad format pbfile"));
        }
    }

    pos += 2;
    if (size == 0) {
        value.set(buffer, 0, 0);
        return true;
    }

    pos += size;
    key.set(pos);

    int readlen = 0;
    if (size < buffer.length) {
        int already_read = 0;
        while (already_read < size) {
            readlen = in.read(buffer, already_read, size - already_read);
            if (readlen == -1) {
                if (already_read < size) {
                    LOG.info("Parse the pbfile error:" + file.toUri().toString());
                    LOG.info("current read size" + readlen + " but expected size:" + size);
                    if (skipbad) {
                        LOG.info("Skip the bad file");
                        reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
                        return false;
                    } else {
                        throw (new IOException("Bad format pbfile"));
                    }
                } else
                    break;
            }

            already_read += readlen;
        }
        value.set(buffer, 0, size);
    } else {
        byte[] tmp = new byte[size];
        int already_read = 0;
        while (already_read < size) {
            readlen = in.read(tmp, already_read, size - already_read);
            if (readlen == -1) {
                if (already_read < size) {
                    LOG.info("Parse the pbfile error:" + file.toUri().toString());
                    LOG.info("current read size" + readlen + " but expected size:" + size);
                    if (skipbad) {
                        LOG.info("Skip the bad file");
                        reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
                        return false;
                    } else {
                        throw (new IOException("Bad format pbfile"));
                    }
                } else
                    break;
            }

            already_read += readlen;
        }
        value.set(tmp, 0, size);
    }

    return true;
}

From source file:nl.tudelft.graphalytics.mapreducev2.common.DirectedNodeNeighbourRecordReader.java

License:Apache License

public boolean next(LongWritable key, DirectedNodeNeighbourhood value) throws IOException {
    if (!lineReader.next(lineKey, lineValue)) {
        return false;
    }/*from   w  w  w .  ja  v  a 2s  . co  m*/

    key.set(lineKey.get());
    DirectedNodeNeighbourhood tmp = new DirectedNodeNeighbourhood(this.textValueToObj(lineValue));
    value.setCentralNode(tmp.getCentralNode());
    value.setDirectedNodeNeighbourhood(tmp.getDirectedNodeNeighbourhood());

    return true;
}

From source file:nl.tudelft.graphalytics.mapreducev2.common.UndirectedNodeNeighbourRecordReader.java

License:Apache License

public boolean next(LongWritable key, UndirectedNodeNeighbourhood value) throws IOException {
    if (!lineReader.next(lineKey, lineValue)) {
        return false;
    }/* w  w  w.  j a  v  a  2 s .c  om*/

    key.set(lineKey.get());
    UndirectedNodeNeighbourhood tmp = new UndirectedNodeNeighbourhood(this.textValueToObj(lineValue));
    value.setCentralNode(tmp.getCentralNode());
    value.setNodeNeighbourhood(tmp.getNodeNeighbourhood());

    return true;
}