List of usage examples for org.apache.hadoop.io LongWritable set
public void set(long value)
From source file:com.datascience.hadoop.CsvRecordReader.java
License:Apache License
@Override public boolean next(LongWritable key, ListWritable<Text> value) throws IOException { value.clear();/*from www . j a va2 s. c o m*/ try { if (iterator.hasNext()) { CSVRecord record = iterator.next(); position++; colLength = colLength == null ? record.size() : colLength; if ((!record.isConsistent() || record.size() != colLength) && strict) { String message = String.format("%s: %s", "inconsistent record at position", position); throw new CsvParseException(message); } key.set(record.getRecordNumber()); for (int i = 0; i < record.size(); i++) { String item = record.get(i); if (item == null) { value.add(null); } else { Text text = cache[i]; if (text == null) { text = new Text(); cache[i] = text; } text.set(item); value.add(text); } } //position = record.getCharacterPosition(); return true; } } catch (Exception e) { LOGGER.warn("failed to parse record at position: " + position); if (strict) { throw e; } else { return next(key, value); } } return false; }
From source file:com.digitalpebble.behemoth.io.warc.WarcFileRecordReader.java
License:Open Source License
public boolean next(LongWritable key, WritableWarcRecord value) throws IOException { DataInputStream whichStream = null; if (compressionInput != null) { whichStream = compressionInput;/*from www . j a v a2s .com*/ } else if (currentFile != null) { whichStream = currentFile; } if (whichStream == null) { return false; } WarcRecord newRecord = WarcRecord.readNextWarcRecord(whichStream); if (newRecord == null) { // try advancing the file if (openNextFile()) { newRecord = WarcRecord.readNextWarcRecord(whichStream); } if (newRecord == null) { return false; } } totalNumBytesRead += (long) newRecord.getTotalRecordLength(); newRecord.setWarcFilePath(filePathList[currentFilePath].toString()); // now, set our output variables value.setRecord(newRecord); key.set(recordNumber); recordNumber++; return true; }
From source file:com.ds.lzo.DeprecatedLzoLineRecordReaderForCombined.java
License:Open Source License
@Override public boolean next(LongWritable key, Text value) throws IOException { // Since the LZOP codec reads everything in LZO blocks, we can't stop if pos == end. // Instead, wait for the next block to be read in when pos will be > end. while (pos <= end) { key.set(pos); int newSize = in.readLine(value); if (newSize == 0) { return false; }//from w w w .ja v a 2 s. c o m pos = fileIn.getPos(); return true; } return false; }
From source file:com.facebook.hive.orc.lazy.LazyLongDictionaryTreeReader.java
License:Open Source License
private LongWritable createWritable(Object previous, long v) throws IOException { LongWritable result = null; if (previous == null) { result = new LongWritable(); } else {/*from w ww. j a v a 2 s . co m*/ result = (LongWritable) previous; } result.set(v); return result; }
From source file:com.facebook.presto.hive.S3SelectLineRecordReader.java
License:Apache License
@Override public synchronized boolean next(LongWritable key, Text value) throws IOException { while (true) { int bytes = readLine(value); if (bytes <= 0) { if (!selectClient.isRequestComplete()) { throw new IOException("S3 Select request was incomplete as End Event was not received"); }/*from www . ja va 2 s. co m*/ return false; } recordsFromS3++; if (recordsFromS3 > processedRecords) { position += bytes; processedRecords++; key.set(processedRecords); return true; } } }
From source file:com.gotometrics.orderly.example.FixedLongExample.java
License:Apache License
public void serializationExamples() throws Exception { FixedLongWritableRowKey l = new FixedLongWritableRowKey(); LongWritable w = new LongWritable(); ImmutableBytesWritable buffer = new ImmutableBytesWritable(); byte[] b;//from www . j a va2 s. c o m /* Serialize and deserialize into an immutablebyteswritable */ w.set(-93214); b = new byte[l.getSerializedLength(w)]; buffer.set(b); l.serialize(w, buffer); buffer.set(b, 0, b.length); System.out.println("deserialize(serialize(-93214)) = " + ((LongWritable) l.deserialize(buffer)).get()); /* Serialize and deserialize into a byte array (descending sort). */ l.setOrder(Order.DESCENDING); w.set(0); System.out.println("deserialize(serialize(0)) = " + ((LongWritable) l.deserialize(l.serialize(w))).get()); }
From source file:com.gotometrics.orderly.FixedUnsignedLongWritableRowKey.java
License:Apache License
protected LongWritable invertSign(LongWritable lw) { lw.set(lw.get() ^ Long.MIN_VALUE); return lw; }
From source file:com.hadoop.mapred.DeprecatedLzoLineRecordReader.java
License:Open Source License
public boolean next(LongWritable key, Text value) throws IOException { // Since the LZOP codec reads everything in LZO blocks, we can't stop if pos == end. // Instead, wait for the next block to be read in when pos will be > end. while (pos <= end) { key.set(pos); int newSize = in.readLine(value); if (newSize == 0) { return false; }//from ww w . j ava 2s . c o m pos = fileIn.getPos(); return true; } return false; }
From source file:com.hdfs.concat.crush.KeyValuePreservingRecordReaderNextTest.java
License:Apache License
@Override public boolean next(LongWritable key, Text value) throws IOException { if (next) {//from ww w. j ava2s . com key.set(offset); value.set(line); } return next; }
From source file:com.ibm.jaql.lang.expr.system.RJaqlInterface.java
License:Apache License
/** * This method provides the functionality of saving simple R objects into HDFS in one of * the formats supported by Jaql so that it can be directly read into Jaql. * @param localPath// w ww . ja v a2s .c om * @param hdfsPath * @param schemaString * @param format * @param header * @param vector * @return */ public boolean jaqlSave(String localPath, String hdfsPath, String schemaString, String format, boolean header, boolean vector) { if (format.equalsIgnoreCase(FORMAT_DELIM)) { LOG.info("Format: " + FORMAT_DELIM + ", saving to HDFS loc: " + hdfsPath); return RUtil.saveToHDFS(localPath, hdfsPath); } try { JobConf conf = new JobConf(); int DEFAULT_BUFFER_SIZE = 64 * 1024; int bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); BufferedReader reader = new BufferedReader(new FileReader(localPath), bufferSize); LongWritable key = new LongWritable(0); long count = 0; Text value = new Text(); BufferedJsonRecord options = new BufferedJsonRecord(2); BufferedJsonArray headerArray = null; if (header) { String headerString = reader.readLine(); String[] headers = splitPattern.split(headerString); headerArray = new BufferedJsonArray(headers.length); for (int i = 0; i < headers.length; i++) { headerArray.set(i, new JsonString(StringUtils.strip(headers[i], "\""))); } count++; } Schema schema = null; if (schemaString != null) { schema = SchemaFactory.parse(schemaString); } if (headerArray != null) { RecordSchema recordSchema = (RecordSchema) schema; // construct new matching schema List<Field> fields = new LinkedList<Field>(); for (JsonValue fieldName : headerArray) { Field field; if (recordSchema == null) { field = new Field((JsonString) fieldName, SchemaFactory.stringSchema(), false); } else { field = recordSchema.getField((JsonString) fieldName); if (field == null) throw new NullPointerException("header field not in schema: " + fieldName); // FIXME: schema fields that are not in the header are currently consider OK } fields.add(field); } // and set it schema = new RecordSchema(fields, null); } if (schema != null) options.add(DelOptionParser.SCHEMA_NAME, new JsonSchema(schema)); KeyValueImport<LongWritable, Text> converter = null; if (vector) { converter = new FromLinesConverter(); } else { converter = new FromDelConverter(); } LOG.info("Initializing Converter with options: " + options); converter.init(options); Schema tmpSchema = converter.getSchema(); tmpSchema = SchemaTransformation.removeNullability(tmpSchema); if (!tmpSchema.is(JsonType.ARRAY, JsonType.RECORD, JsonType.BOOLEAN, JsonType.DECFLOAT, JsonType.DOUBLE, JsonType.LONG, JsonType.STRING).always()) { throw new IOException("Unrecognized schema type: " + schema.getSchemaType()); } JsonValue outValue = converter.createTarget(); JsonHolder outKeyHolder; JsonHolder outValueHolder; if (format.equalsIgnoreCase(FORMAT_DEFAULT)) { HadoopSerializationDefault.register(conf); outKeyHolder = new JsonHolderDefault(); outValueHolder = new JsonHolderDefault(outValue); LOG.info("Registered serializer for Default format."); } else if (format.equalsIgnoreCase(FORMAT_TEMP)) { // TODO: There should be a better way of doing this. HadoopSerializationTemp // now does it in an ugly way. BufferedJsonRecord tmpOptions = new BufferedJsonRecord(); BufferedJsonRecord outOptions = new BufferedJsonRecord(); outOptions.add(new JsonString("schema"), new JsonSchema(schema)); tmpOptions.add(new JsonString("options"), outOptions); conf.set(ConfSetter.CONFOUTOPTIONS_NAME, tmpOptions.toString()); HadoopSerializationTemp.register(conf); outKeyHolder = new JsonHolderTempKey(null); outValueHolder = new JsonHolderTempValue(); LOG.info("Registered serializer for HadoopTemp format."); } else { throw new IOException("Unrecognized serialization format requested: " + format); } FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(hdfsPath); Writer writer = SequenceFile.createWriter(fs, conf, outputPath, outKeyHolder.getClass(), outValueHolder.getClass()); String line; while ((line = reader.readLine()) != null) { key.set(count++); value.set(line); outValue = converter.convert(key, value, outValue); outValueHolder.value = outValue; writer.append(outKeyHolder, outValueHolder); } LOG.info("Transferred " + count + " line(s)."); reader.close(); writer.close(); } catch (IOException e) { LOG.info("Error in saving object.", e); return false; } return true; }