List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void serializeLong(long i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; long temp;// w w w . j ava 2 s . c o m if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp /= 10; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { int digit = (int) (i % 10); buffer[position--] = digits[digit]; i /= 10; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static long consumeLong(Text text, char separator) { int i = 0;// w w w.j a v a 2 s. c o m byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && DecimalChars[bytes[i]]) i++; long l = deserializeLong(bytes, 0, i); // If the first char after the long is the separator, skip it if (i < text.getLength() && bytes[i] == separator) i++; // Shift bytes after the long System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return l; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void serializeInt(int i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; int temp;/*from ww w. j ava2s . co m*/ if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp /= 10; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { int digit = i % 10; buffer[position--] = digits[digit]; i /= 10; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static int consumeInt(Text text, char separator) { int i = 0;/*from w w w . j av a 2 s. c om*/ byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && DecimalChars[bytes[i]]) i++; int l = deserializeInt(bytes, 0, i); // If the first char after the long is the separator, skip it if (i < text.getLength() && bytes[i] == separator) i++; // Shift bytes after the long System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return l; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
public static void consumeMap(Text text, Map<String, String> tags) { tags.clear();/* w w w. j av a 2 s. c o m*/ if (text.getLength() > 0) { byte[] tagsBytes = text.getBytes(); if (tagsBytes[0] != Separators[MapStart]) return; int i1 = 1; while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) { int i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator]) i2++; String key = new String(tagsBytes, i1, i2 - i1); i1 = i2 + 1; i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator] && tagsBytes[i2] != Separators[MapEnd]) i2++; String value = new String(tagsBytes, i1, i2 - i1); tags.put(key, value); i1 = i2; if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator]) i1++; } text.set(tagsBytes, i1, text.getLength() - i1); } }
From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java
License:Apache License
/** * Reads the next line from input and return true if a line was read. * If no more lines are available in this split, a false is returned. * @param value// w w w. j a va 2s .co m * @return * @throws IOException */ protected boolean nextLine(Text value) throws IOException { if (blockType == BlockType.RTREE && pos == 8) { // File is positioned at the RTree header // Skip the header and go to first data object in file pos += RTree.skipHeader(in); LOG.info("Skipped R-tree to position: " + pos); // Reinitialize record reader at the new position lineReader = new LineReader(in); } while (getFilePosition() <= end) { value.clear(); int b = 0; if (buffer != null) { // Read the first line encountered in buffer int eol = RTree.skipToEOL(buffer, 0); b += eol; value.append(buffer, 0, eol); if (eol < buffer.length) { // There are still some bytes remaining in buffer byte[] tmp = new byte[buffer.length - eol]; System.arraycopy(buffer, eol, tmp, 0, tmp.length); } else { buffer = null; } // Check if a complete line has been read from the buffer byte last_byte = value.getBytes()[value.getLength() - 1]; if (last_byte == '\n' || last_byte == '\r') return true; } // Read the first line from stream Text temp = new Text(); b += lineReader.readLine(temp); if (b == 0) { // Indicates an end of stream return false; } pos += b; // Append the part read from stream to the part extracted from buffer value.append(temp.getBytes(), 0, temp.getLength()); if (value.getLength() > 1) { // Read a non-empty line. Note that end-of-line character is included return true; } } // Reached end of file return false; }
From source file:com.ricemap.spateDB.operations.RecordCount.java
License:Apache License
/** * Counts the exact number of lines in a file by opening the file and * reading it line by line/* ww w . j a va 2 s . com*/ * @param fs * @param file * @return * @throws IOException */ public static long recordCountLocal(FileSystem fs, Path file) throws IOException { LineReader lineReader = new LineReader(fs.open(file)); Text line = new Text(); long lineCount = 0; while (lineReader.readLine(line) > 0) { if (line.getLength() > 0) lineCount++; } lineReader.close(); return lineCount; }
From source file:com.ricemap.spateDB.operations.Tail.java
License:Apache License
/** * Reads a maximum of n lines from the stream starting from its current * position and going backward./*from ww w . j a va 2 s . c om*/ * * @param in - An input stream. It'll be scanned from its current position * backward till position 0 * @param n - Maximum number of lines to return * @param stockObject - An object used to deserialize lines read. It can * be set to <code>null</code> if output is also <code>null</code>. In this * case, nothing is reported to the output. * @param output - An output collector used to report lines read. * @return - The position of the beginning of the earliest line read from * buffer. * @throws IOException */ public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject, ResultCollector<T> output) throws IOException { int lines_read = 0; long end = in.getPos(); long offset_of_last_eol = end; long last_read_byte = end; LongWritable line_offset = new LongWritable(); Text read_line = new Text(); Text remainder_from_last_buffer = new Text(); byte[] buffer = new byte[4096]; while (last_read_byte > 0 && lines_read < n) { // Read next chunk from the back long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length; in.seek(first_byte_to_read); int bytes_to_read = (int) (last_read_byte - first_byte_to_read); in.read(buffer, 0, bytes_to_read); last_read_byte = first_byte_to_read; // Iterate over bytes in this buffer int i_last_byte_consumed_in_buffer = bytes_to_read; int i_last_byte_examined_in_buffer = bytes_to_read; while (i_last_byte_examined_in_buffer > 0 && lines_read < n) { byte byte_examined = buffer[--i_last_byte_examined_in_buffer]; if (byte_examined == '\n' || byte_examined == '\r') { // Found an end of line character // Report this to output unless it's empty long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer; if (offset_of_last_eol - offset_of_this_eol > 1) { if (output != null) { read_line.clear(); // +1 is to skip the EOL at the beginning read_line.append(buffer, i_last_byte_examined_in_buffer + 1, i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1)); // Also append bytes remaining from last buffer if (remainder_from_last_buffer.getLength() > 0) { read_line.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); } line_offset.set(offset_of_this_eol + 1); stockObject.fromText(read_line); output.collect(stockObject); } lines_read++; remainder_from_last_buffer.clear(); } i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer; offset_of_last_eol = offset_of_this_eol; } } if (i_last_byte_consumed_in_buffer > 0) { // There are still some bytes not consumed in buffer if (remainder_from_last_buffer.getLength() == 0) { // Store whatever is remaining in remainder_from_last_buffer remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer); } else { // Prepend remaining bytes to Text Text t = new Text(); t.append(buffer, 0, i_last_byte_consumed_in_buffer); t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); remainder_from_last_buffer = t; } } } if (lines_read < n && remainder_from_last_buffer.getLength() > 0) { // There is still one last line needs to be reported lines_read++; if (output != null) { read_line = remainder_from_last_buffer; line_offset.set(0); stockObject.fromText(read_line); output.collect(stockObject); } offset_of_last_eol = -1; } return offset_of_last_eol + 1; }
From source file:com.ricemap.spateDB.util.CommandLineArguments.java
License:Apache License
/** * //from w ww. java2s. c o m * @param autodetect - Automatically detect shape type from input file * if shape is not explicitly set by user * @return */ public Shape getShape(boolean autodetect) { String shapeTypeStr = get("shape"); final Text shapeType = new Text(); if (shapeTypeStr != null) shapeType.set(shapeTypeStr.toLowerCase().getBytes()); if (autodetect && shapeType.getLength() == 0 && getPath() != null) { // Shape type not found in parameters. Try to infer from a line in input // file Path in_file = getPath(); try { Sampler.sampleLocal(in_file.getFileSystem(new Configuration()), in_file, 1, 0, new ResultCollector<Text2>() { @Override public void collect(Text2 value) { String val = value.toString(); String[] parts = val.split(","); if (parts.length == 2) { shapeType.set("point".getBytes()); } else if (parts.length == 4) { shapeType.set("rect".getBytes()); } else if (parts.length > 4) { shapeType.set("tiger".getBytes()); } } }, new Text2(), new Text2()); } catch (IOException e) { e.printStackTrace(); } } Shape stockShape = null; if (shapeType.toString().startsWith("rect")) { stockShape = new Prism(); } else if (shapeType.toString().startsWith("point")) { stockShape = new Point3d(); } else if (shapeType.toString().startsWith("datapoint")) { stockShape = new DataPoint(); } else if (shapeTypeStr != null) { // Use the shapeType as a class name and try to instantiate it dynamically try { Class<? extends Shape> shapeClass = Class.forName(shapeTypeStr).asSubclass(Shape.class); stockShape = shapeClass.newInstance(); } catch (ClassNotFoundException e) { } catch (InstantiationException e) { } catch (IllegalAccessException e) { } } if (stockShape == null) LOG.warn("unknown shape type: " + shapeTypeStr); return stockShape; }
From source file:com.rramos.bigdata.utils.GenericUDFSha2.java
License:Apache License
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (digest == null) { return null; }/*from w ww. java 2 s . c om*/ digest.reset(); if (isStr) { Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters); if (n == null) { return null; } digest.update(n.getBytes(), 0, n.getLength()); } else { BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters); if (bWr == null) { return null; } digest.update(bWr.getBytes(), 0, bWr.getLength()); } byte[] resBin = digest.digest(); String resStr = Hex.encodeHexString(resBin); output.set(resStr); return output; }