Example usage for org.apache.hadoop.io LongWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable set.

Prototype

public void set(long value)

Source Link

Document

Set the value of this LongWritable.

Usage

From source file:com.datascience.hadoop.CsvRecordReader.java

License:Apache License

@Override
public boolean next(LongWritable key, ListWritable<Text> value) throws IOException {
    value.clear();/*from  www  . j  a va2  s. c o  m*/
    try {
        if (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            position++;
            colLength = colLength == null ? record.size() : colLength;
            if ((!record.isConsistent() || record.size() != colLength) && strict) {
                String message = String.format("%s: %s", "inconsistent record at position", position);
                throw new CsvParseException(message);
            }

            key.set(record.getRecordNumber());

            for (int i = 0; i < record.size(); i++) {
                String item = record.get(i);
                if (item == null) {
                    value.add(null);
                } else {
                    Text text = cache[i];
                    if (text == null) {
                        text = new Text();
                        cache[i] = text;
                    }
                    text.set(item);
                    value.add(text);
                }
            }
            //position = record.getCharacterPosition();
            return true;
        }

    } catch (Exception e) {
        LOGGER.warn("failed to parse record at position: " + position);
        if (strict) {
            throw e;
        } else {
            return next(key, value);
        }
    }
    return false;
}

From source file:com.digitalpebble.behemoth.io.warc.WarcFileRecordReader.java

License:Open Source License

public boolean next(LongWritable key, WritableWarcRecord value) throws IOException {
    DataInputStream whichStream = null;
    if (compressionInput != null) {
        whichStream = compressionInput;/*from   www  . j  a  v  a2s  .com*/
    } else if (currentFile != null) {
        whichStream = currentFile;
    }

    if (whichStream == null) {
        return false;
    }

    WarcRecord newRecord = WarcRecord.readNextWarcRecord(whichStream);
    if (newRecord == null) {
        // try advancing the file
        if (openNextFile()) {
            newRecord = WarcRecord.readNextWarcRecord(whichStream);
        }

        if (newRecord == null) {
            return false;
        }
    }

    totalNumBytesRead += (long) newRecord.getTotalRecordLength();
    newRecord.setWarcFilePath(filePathList[currentFilePath].toString());

    // now, set our output variables
    value.setRecord(newRecord);
    key.set(recordNumber);

    recordNumber++;
    return true;
}

From source file:com.ds.lzo.DeprecatedLzoLineRecordReaderForCombined.java

License:Open Source License

@Override
public boolean next(LongWritable key, Text value) throws IOException {
    // Since the LZOP codec reads everything in LZO blocks, we can't stop if pos == end.
    // Instead, wait for the next block to be read in when pos will be > end.
    while (pos <= end) {
        key.set(pos);

        int newSize = in.readLine(value);
        if (newSize == 0) {
            return false;
        }//from   w  w  w .ja  v a 2  s.  c o m
        pos = fileIn.getPos();
        return true;
    }
    return false;
}

From source file:com.facebook.hive.orc.lazy.LazyLongDictionaryTreeReader.java

License:Open Source License

private LongWritable createWritable(Object previous, long v) throws IOException {
    LongWritable result = null;
    if (previous == null) {
        result = new LongWritable();
    } else {/*from  w ww. j a v  a 2 s  .  co  m*/
        result = (LongWritable) previous;
    }
    result.set(v);
    return result;
}

From source file:com.facebook.presto.hive.S3SelectLineRecordReader.java

License:Apache License

@Override
public synchronized boolean next(LongWritable key, Text value) throws IOException {
    while (true) {
        int bytes = readLine(value);
        if (bytes <= 0) {
            if (!selectClient.isRequestComplete()) {
                throw new IOException("S3 Select request was incomplete as End Event was not received");
            }/*from www  .  ja va 2 s.  co m*/
            return false;
        }
        recordsFromS3++;
        if (recordsFromS3 > processedRecords) {
            position += bytes;
            processedRecords++;
            key.set(processedRecords);
            return true;
        }
    }
}

From source file:com.gotometrics.orderly.example.FixedLongExample.java

License:Apache License

public void serializationExamples() throws Exception {
    FixedLongWritableRowKey l = new FixedLongWritableRowKey();
    LongWritable w = new LongWritable();
    ImmutableBytesWritable buffer = new ImmutableBytesWritable();
    byte[] b;//from   www  . j  a va2 s. c o m

    /* Serialize and deserialize into an immutablebyteswritable */
    w.set(-93214);
    b = new byte[l.getSerializedLength(w)];
    buffer.set(b);
    l.serialize(w, buffer);
    buffer.set(b, 0, b.length);
    System.out.println("deserialize(serialize(-93214)) = " + ((LongWritable) l.deserialize(buffer)).get());

    /* Serialize and deserialize into a byte array (descending sort).  */
    l.setOrder(Order.DESCENDING);
    w.set(0);
    System.out.println("deserialize(serialize(0)) = " + ((LongWritable) l.deserialize(l.serialize(w))).get());
}

From source file:com.gotometrics.orderly.FixedUnsignedLongWritableRowKey.java

License:Apache License

protected LongWritable invertSign(LongWritable lw) {
    lw.set(lw.get() ^ Long.MIN_VALUE);
    return lw;
}

From source file:com.hadoop.mapred.DeprecatedLzoLineRecordReader.java

License:Open Source License

public boolean next(LongWritable key, Text value) throws IOException {
    // Since the LZOP codec reads everything in LZO blocks, we can't stop if pos == end.
    // Instead, wait for the next block to be read in when pos will be > end.
    while (pos <= end) {
        key.set(pos);

        int newSize = in.readLine(value);
        if (newSize == 0) {
            return false;
        }//from ww w .  j  ava 2s . c  o  m
        pos = fileIn.getPos();
        return true;
    }
    return false;
}

From source file:com.hdfs.concat.crush.KeyValuePreservingRecordReaderNextTest.java

License:Apache License

@Override
public boolean next(LongWritable key, Text value) throws IOException {
    if (next) {//from   ww w. j ava2s . com
        key.set(offset);
        value.set(line);
    }

    return next;
}

From source file:com.ibm.jaql.lang.expr.system.RJaqlInterface.java

License:Apache License

/**
 * This method provides the functionality of saving simple R objects into HDFS in one of
 * the formats supported by Jaql so that it can be directly read into Jaql.
 * @param localPath// w ww . ja v a2s  .c om
 * @param hdfsPath
 * @param schemaString
 * @param format
 * @param header
 * @param vector
 * @return
 */
public boolean jaqlSave(String localPath, String hdfsPath, String schemaString, String format, boolean header,
        boolean vector) {
    if (format.equalsIgnoreCase(FORMAT_DELIM)) {
        LOG.info("Format: " + FORMAT_DELIM + ", saving to HDFS loc: " + hdfsPath);
        return RUtil.saveToHDFS(localPath, hdfsPath);
    }
    try {
        JobConf conf = new JobConf();
        int DEFAULT_BUFFER_SIZE = 64 * 1024;
        int bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
        BufferedReader reader = new BufferedReader(new FileReader(localPath), bufferSize);
        LongWritable key = new LongWritable(0);
        long count = 0;
        Text value = new Text();
        BufferedJsonRecord options = new BufferedJsonRecord(2);
        BufferedJsonArray headerArray = null;
        if (header) {
            String headerString = reader.readLine();
            String[] headers = splitPattern.split(headerString);
            headerArray = new BufferedJsonArray(headers.length);
            for (int i = 0; i < headers.length; i++) {
                headerArray.set(i, new JsonString(StringUtils.strip(headers[i], "\"")));
            }
            count++;
        }

        Schema schema = null;
        if (schemaString != null) {
            schema = SchemaFactory.parse(schemaString);
        }

        if (headerArray != null) {
            RecordSchema recordSchema = (RecordSchema) schema;

            // construct new matching schema
            List<Field> fields = new LinkedList<Field>();
            for (JsonValue fieldName : headerArray) {
                Field field;
                if (recordSchema == null) {
                    field = new Field((JsonString) fieldName, SchemaFactory.stringSchema(), false);
                } else {
                    field = recordSchema.getField((JsonString) fieldName);
                    if (field == null)
                        throw new NullPointerException("header field not in schema: " + fieldName);
                    // FIXME: schema fields that are not in the header are currently consider OK
                }
                fields.add(field);
            }

            // and set it
            schema = new RecordSchema(fields, null);
        }
        if (schema != null)
            options.add(DelOptionParser.SCHEMA_NAME, new JsonSchema(schema));
        KeyValueImport<LongWritable, Text> converter = null;
        if (vector) {
            converter = new FromLinesConverter();
        } else {
            converter = new FromDelConverter();
        }
        LOG.info("Initializing Converter with options: " + options);
        converter.init(options);
        Schema tmpSchema = converter.getSchema();
        tmpSchema = SchemaTransformation.removeNullability(tmpSchema);
        if (!tmpSchema.is(JsonType.ARRAY, JsonType.RECORD, JsonType.BOOLEAN, JsonType.DECFLOAT, JsonType.DOUBLE,
                JsonType.LONG, JsonType.STRING).always()) {
            throw new IOException("Unrecognized schema type: " + schema.getSchemaType());
        }
        JsonValue outValue = converter.createTarget();
        JsonHolder outKeyHolder;
        JsonHolder outValueHolder;
        if (format.equalsIgnoreCase(FORMAT_DEFAULT)) {
            HadoopSerializationDefault.register(conf);
            outKeyHolder = new JsonHolderDefault();
            outValueHolder = new JsonHolderDefault(outValue);
            LOG.info("Registered serializer for Default format.");
        } else if (format.equalsIgnoreCase(FORMAT_TEMP)) {
            // TODO: There should be a better way of doing this. HadoopSerializationTemp
            // now does it in an ugly way.
            BufferedJsonRecord tmpOptions = new BufferedJsonRecord();
            BufferedJsonRecord outOptions = new BufferedJsonRecord();
            outOptions.add(new JsonString("schema"), new JsonSchema(schema));
            tmpOptions.add(new JsonString("options"), outOptions);
            conf.set(ConfSetter.CONFOUTOPTIONS_NAME, tmpOptions.toString());
            HadoopSerializationTemp.register(conf);
            outKeyHolder = new JsonHolderTempKey(null);
            outValueHolder = new JsonHolderTempValue();
            LOG.info("Registered serializer for HadoopTemp format.");
        } else {
            throw new IOException("Unrecognized serialization format requested: " + format);
        }
        FileSystem fs = FileSystem.get(conf);
        Path outputPath = new Path(hdfsPath);
        Writer writer = SequenceFile.createWriter(fs, conf, outputPath, outKeyHolder.getClass(),
                outValueHolder.getClass());
        String line;
        while ((line = reader.readLine()) != null) {
            key.set(count++);
            value.set(line);
            outValue = converter.convert(key, value, outValue);
            outValueHolder.value = outValue;
            writer.append(outKeyHolder, outValueHolder);
        }
        LOG.info("Transferred " + count + " line(s).");
        reader.close();
        writer.close();
    } catch (IOException e) {
        LOG.info("Error in saving object.", e);
        return false;
    }
    return true;
}