List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.naltel.hadoop.hive.genericUDF.examples.GenericUDFInstr.java
License:Apache License
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; }/* w ww . j a v a 2s . co m*/ Text text = (Text) converters[0].convert(arguments[0].get()); Text subtext = (Text) converters[1].convert(arguments[1].get()); int startIndex = (arguments.length >= 3) ? ((IntWritable) converters[2].convert(arguments[2].get())).get() : DEFAULT_START_INDEX; int nth = (arguments.length == 4) ? ((IntWritable) converters[3].convert(arguments[3].get())).get() : DEFAULT_NTH; // argument checking if (startIndex < 0) { // if startIndex is negative, // the function counts back startIndex number of characters from the // end of text and then searches // towards the beginning of text. startIndex = text.getLength() + startIndex; } if (startIndex <= 0 || startIndex > text.getLength()) { intWritable.set(0); return intWritable; } int index = 0; int currentIndex = startIndex; for (int i = 0; i < nth; i++) { index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1; if (index == 0) {// not found intWritable.set(0); return intWritable; } currentIndex = index + 1; } intWritable.set(index); return intWritable; }
From source file:com.pagerankcalculator.calculation.PageRankCalculationMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIdx1 = value.find("\t"); int tabIdx2 = value.find("\t", tabIdx1 + 1); String userID = Text.decode(value.getBytes(), 0, tabIdx1); String pageRank = Text.decode(value.getBytes(), tabIdx1 + 1, tabIdx2 - (tabIdx1 + 1)); String CSVFollowingIDs = Text.decode(value.getBytes(), tabIdx2 + 1, value.getLength() - (tabIdx2 + 1)); // System.out.print(userID); // System.out.print("\t"); // System.out.print(pageRank); // System.out.print("\t"); // System.out.println(CSVFollowingIDs); String[] followingIDs = CSVFollowingIDs.split(TwitterPageRank.FOLLOWING_LIST_DELIMETER); Integer totalFollowingIDs = followingIDs.length; for (String followingID : followingIDs) { String pageRankWithTotalFollowing = pageRank + "\t" + totalFollowingIDs.toString(); context.write(new Text(followingID), new Text(pageRankWithTotalFollowing)); }//from w ww . j ava 2 s.c o m context.write(new Text(userID), new Text(TwitterPageRank.FOLLOWING_LIST_TAG + CSVFollowingIDs)); }
From source file:com.pagerankcalculator.graphparsing.GraphParsingMapper.java
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int tabIndex = value.find("\t"); userID = Text.decode(value.getBytes(), 0, tabIndex); followerID = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(followerID), new Text(userID)); }
From source file:com.qq.pig.udf.CustomJsonLoader.java
License:Apache License
public Tuple parseTuple(Text val) throws IOException { // Create a parser specific for this input line. This may not be the // most efficient approach. //TODO why make a byte copy? byte[] newBytes = new byte[val.getLength()]; System.arraycopy(val.getBytes(), 0, newBytes, 0, val.getLength()); ByteArrayInputStream bais = new ByteArrayInputStream(newBytes); JsonParser p = jsonFactory.createJsonParser(bais); // Create the tuple we will be returning. We create it with the right // number of fields, as the Tuple object is optimized for this case. ResourceFieldSchema[] fields = schema.getFields(); Tuple t = tupleFactory.newTuple(fields.length); // Read the start object marker. Throughout this file if the parsing // isn't what we expect we return a tuple with null fields rather than // throwing an exception. That way a few mangled lines don't fail the // job./*ww w .j a va 2 s. com*/ if (p.nextToken() != JsonToken.START_OBJECT) { warn("Bad record, could not find start of record " + val.toString(), PigWarning.UDF_WARNING_1); return t; } readFields(p, t); p.close(); return t; }
From source file:com.ricemap.spateDB.core.GridInfo.java
License:Apache License
@Override public void fromText(Text text) { super.fromText(text); if (text.getLength() > 0) { // Remove the first comma System.arraycopy(text.getBytes(), 1, text.getBytes(), 0, text.getLength() - 1); layers = (int) TextSerializerHelper.consumeInt(text, ','); columns = (int) TextSerializerHelper.consumeInt(text, ','); rows = (int) TextSerializerHelper.consumeInt(text, '\0'); }/*from w ww . j a va 2 s . c o m*/ }
From source file:com.ricemap.spateDB.core.GridRecordWriter.java
License:Apache License
/** * Close the given cell freeing all memory reserved by it. * Once a cell is closed, we should not write more data to it. * @param cellInfo/* w ww .ja v a 2 s. c om*/ * @throws IOException */ protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath, final OutputStream intermediateCellStream, final OutputStream masterFile, final Prism cellMbr) throws IOException { Thread closingThread = new Thread() { @Override public void run() { try { Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream, finalCellPath); // Write an entry to the master file // Write a line to the master file including file name and cellInfo if (masterFile != null) { Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr); Text line = partition.toText(new Text()); masterFile.write(line.getBytes(), 0, line.getLength()); masterFile.write(NEW_LINE); } } catch (IOException e) { throw new RuntimeException("Error closing thread", e); } } }; closingThreads.add(closingThread); // Remove previously terminated threads while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) { closingThreads.remove(0); } // Start first thread (if exists) if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW) closingThreads.get(0).start(); }
From source file:com.ricemap.spateDB.core.Partition.java
License:Apache License
@Override public void fromText(Text text) { super.fromText(text); // Skip the comma and read filename filename = new String(text.getBytes(), 1, text.getLength() - 1); }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
/** * Appends hex representation of the given number to the given string. * If append is set to true, a comma is also appended to the text. * @param i// www.j a v a 2 s . c om * @param t * @param appendComma */ public static void serializeHexLong(long i, Text t, char toAppend) { // Calculate number of bytes needed to serialize the given long int bytes_needed = 0; long temp; if (i < 0) { bytes_needed++; // An additional temp = -i; } else { temp = i; } do { bytes_needed += 1; temp >>>= 4; } while (temp != 0); if (toAppend != '\0') bytes_needed++; // Reserve the bytes needed in the text t.append(ToAppend, 0, bytes_needed); // Extract the underlying buffer array and fill it directly byte[] buffer = t.getBytes(); // Position of the next character to write in the text int position = t.getLength() - 1; if (toAppend != '\0') buffer[position--] = (byte) toAppend; final int shift = 4; final int radix = 1 << shift; final long mask = radix - 1; // Negative sign is prepended separately for negative numbers boolean negative = false; if (i < 0) { i = -i; negative = true; } do { buffer[position--] = digits[(int) (i & mask)]; i >>>= shift; } while (i != 0); if (negative) buffer[position--] = '-'; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
/** * Deserializes and consumes a long from the given text. Consuming means all * characters read for deserialization are removed from the given text. * If separator is non-zero, a long is read and consumed up to the first * occurrence of this separator. The separator is also consumed. * @param text/*from w w w.jav a2 s. c om*/ * @param separator * @return */ public static long consumeHexLong(Text text, char separator) { int i = 0; byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && HexadecimalChars[bytes[i]]) i++; long l = deserializeHexLong(bytes, 0, i); // If the first char after the long is the separator, skip it if (i < text.getLength() && bytes[i] == separator) i++; // Shift bytes after the long System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return l; }
From source file:com.ricemap.spateDB.io.TextSerializerHelper.java
License:Apache License
/** * Deserializes and consumes a double from the given text. Consuming means all * characters read for deserialization are removed from the given text. * If separator is non-zero, a double is read and consumed up to the first * occurrence of this separator. The separator is also consumed. * @param text//ww w . j a va 2 s. c om * @param separator * @return */ public static double consumeDouble(Text text, char separator) { int i = 0; byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E' || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.')) i++; double d = deserializeDouble(bytes, 0, i); if (i < text.getLength() && bytes[i] == separator) i++; System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return d; }