List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.asakusafw.runtime.value.StringOption.java
License:Apache License
private static boolean equalsTexts(Text a, Text b) { return ByteArrayUtil.equals(a.getBytes(), 0, a.getLength(), b.getBytes(), 0, b.getLength()); }
From source file:com.asakusafw.runtime.value.StringOption.java
License:Apache License
private static int compareTexts(Text a, Text b) { return ByteArrayUtil.compare(a.getBytes(), 0, a.getLength(), b.getBytes(), 0, b.getLength()); }
From source file:com.asakusafw.runtime.value.StringOptionUtil.java
License:Apache License
/** * Returns a {@link Reader} to read the text contents in the {@link StringOption}. * @param option the target {@link StringOption} * @return the created reader/*from ww w.jav a2 s. co m*/ * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static Reader asReader(StringOption option) { Text text = option.get(); InputBuffer buffer = new InputBuffer(); buffer.reset(text.getBytes(), 0, text.getLength()); return new InputStreamReader(buffer, ENCODING); }
From source file:com.asakusafw.runtime.value.StringOptionUtil.java
License:Apache License
/** * Trims the leading/trailing classical whitespace characters in the {@link StringOption}. * This only removes the following characters: * <ul>// w ww. ja v a 2 s.c o m * <li> {@code "\t" (HT:U+0009)} </li> * <li> {@code "\n" (LF:U+000a)} </li> * <li> {@code "\r" (CR:U+000d)} </li> * <li> {@code " " (SP:U+0020)} </li> * </ul> * This directly modifies the target {@link StringOption}. * @param option the target {@link StringOption} * @throws NullPointerException if the {@link StringOption} is/represents {@code null} */ public static void trim(StringOption option) { Text text = option.get(); byte[] bytes = text.getBytes(); int length = text.getLength(); int start = 0; int last = length - 1; for (; start <= last; start++) { if (isTrimTarget(bytes[start]) == false) { break; } } for (; last >= start; last--) { if (isTrimTarget(bytes[last]) == false) { break; } } if (start == 0 && last == length - 1) { return; } text.set(bytes, start, last + 1 - start); }
From source file:com.asakusafw.runtime.value.StringOptionUtil.java
License:Apache License
private static void append(StringOption target, Text text) { target.get().append(text.getBytes(), 0, text.getLength()); }
From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvLineReader.java
License:Apache License
/** * Read from the InputStream into the given Text. * * @param txt the object to store the given line * @param maxLineLength the maximum number of bytes to store into txt. * @param maxBytesToConsume the maximum number of bytes to consume in this * call.// w w w . j a v a 2 s . c o m * @return the number of bytes read including the newline * @throws IOException if the underlying stream throws */ public int readLine(Text txt, int maxLineLength, int maxBytesToConsume) throws IOException { txt.clear(); boolean hadFinalNewline = false; boolean hadFinalReturn = false; boolean hitEndOfFile = false; int startPosn = bufferPosn; long bytesConsumed = 0; boolean inQuote = false; boolean isLastCharEscapeChar = false; outerLoop: while (true) { if (bufferPosn >= bufferLength) { if (!backfill()) { hitEndOfFile = true; break; } } startPosn = bufferPosn; for (; bufferPosn < bufferLength; ++bufferPosn) { switch (buffer[bufferPosn]) { case '\\': isLastCharEscapeChar = !isLastCharEscapeChar; break; case '"': if (!inQuote && hadFinalReturn) { break outerLoop; } if (!isLastCharEscapeChar) { inQuote = !inQuote; } isLastCharEscapeChar = false; break; case '\n': isLastCharEscapeChar = false; if (!inQuote) { hadFinalNewline = true; bufferPosn += 1; break outerLoop; } break; case '\r': isLastCharEscapeChar = false; if (!inQuote) { if (hadFinalReturn) { // leave this \r in the stream, so we'll get it next time break outerLoop; } hadFinalReturn = true; } break; default: isLastCharEscapeChar = false; if (!inQuote && hadFinalReturn) { break outerLoop; } } } bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0); length = Math.min(length, maxLineLength - txt.getLength()); if (length >= 0) txt.append(buffer, startPosn, length); if (bytesConsumed >= maxBytesToConsume) return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE); } int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0); if (!hitEndOfFile) { bytesConsumed += bufferPosn - startPosn; int length = bufferPosn - startPosn - newlineLength; length = Math.min(length, maxLineLength - txt.getLength()); if (length > 0) txt.append(buffer, startPosn, length); } return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE); }
From source file:com.asp.tranlog.TsvImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. *//* w w w . ja va 2 s . c om*/ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); // String lineStr = new String(value.getBytes(), 0, value.getLength(), // "gb18030"); // byte[] lineBytes = new Text(lineStr).getBytes(); int i = 0; try { ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); // ImportTsv.TsvParser.ParsedLine parsed = parser.parse( // lineBytes, Text.utf8Length(lineStr)); byte[] rowKeyBytes = createRowkeyByteArray(lineBytes, parsed); ImmutableBytesWritable rowKey = new ImmutableBytesWritable(rowKeyBytes); Put put = new Put(rowKeyBytes); put.setWriteToWAL(false); for (i = 0; i < parsed.getColumnCount(); i++) { KeyValue kv = null; if (columnTypes[i] == ImportTsv.COL_TYPE_STRING) { kv = new KeyValue(rowKeyBytes, parser.getFamily(i), parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); } else { byte[] colBytes = convertColBytes(lineBytes, parsed, i); if (colBytes == null) throw new ImportTsv.TsvParser.BadTsvLineException("Failed to get bytes for column " + i); kv = new KeyValue(rowKeyBytes, parser.getFamily(i), parser.getQualifier(i), ts, colBytes); } if (kv == null) throw new ImportTsv.TsvParser.BadTsvLineException("Failed to get bytes for column " + i); put.add(kv); } context.write(rowKey, put); } catch (ImportTsv.TsvParser.BadTsvLineException badLine) { if (skipBadLines) { System.err.println("Bad line: " + new String(lineBytes, "gb18030") + ":" + i + "\n"); LOG.error("Bad line: " + new String(lineBytes, "gb18030") + "," + i); incrementBadLineCount(1); return; } else { throw new IOException(badLine); } } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line: " + new String(lineBytes, "gb18030") + ":" + i + "\n"); LOG.error("Bad line: " + new String(lineBytes, "gb18030") + "," + i); incrementBadLineCount(1); return; } else { throw new IOException(e); } } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java
License:Apache License
/** * This method is used to partition graph vertexes. Writing Each vertex to the * corresponding partition. In this method calls recordParse method to create * an HeadNode object. The last call partitioner's getPartitionId method to * calculate the HeadNode belongs to partition's id. If the HeadNode belongs * local partition then written to the local partition or send it to the * appropriate partition.// w ww . j a va 2s. c o m * @param recordReader The recordreader of the split. * @throws IOException The io exception * @throws InterruptedException The Interrupted Exception */ @Override public void write(RecordReader recordReader) throws IOException, InterruptedException { int headNodeNum = 0; int local = 0; int send = 0; int lost = 0; ThreadPool tpool = new ThreadPool(this.sendThreadNum); int staffNum = this.staff.getStaffNum(); BytesWritable kbytes = new BytesWritable(); int ksize = 0; BytesWritable vbytes = new BytesWritable(); int vsize = 0; DataOutputBuffer bb = new DataOutputBuffer(); int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART); int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) / (this.staff.getStaffNum() + this.sendThreadNum); byte[] buffer = new byte[bufferSize]; int bufindex = 0; SerializationFactory sFactory = new SerializationFactory(new Configuration()); Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class); byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER]; int psindex = 0; BytesWritable pidbytes = new BytesWritable(); int psize = 0; BytesWritable sizebytes = new BytesWritable(); int ssize = 0; try { this.keyserializer.open(bb); this.valueserializer.open(bb); psserializer.open(bb); } catch (IOException e) { throw e; } String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID(); File dir = new File("/tmp/bcbsp/" + this.staff.getJobID()); dir.mkdir(); dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID()); dir.mkdir(); ArrayList<File> files = new ArrayList<File>(); try { File file = new File(path + "/" + "data" + ".txt"); files.add(file); DataOutputStream dataWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true))); DataInputStream dataReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt"))); File filet = new File(path + "/" + "pidandsize" + ".txt"); files.add(filet); DataOutputStream psWriter = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true))); DataInputStream psReader = new DataInputStream( new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt"))); while (recordReader != null && recordReader.nextKeyValue()) { headNodeNum++; Text key = new Text(recordReader.getCurrentKey().toString()); Text value = new Text(recordReader.getCurrentValue().toString()); int pid = -1; Text vertexID = this.recordParse.getVertexID(key); if (vertexID != null) { pid = this.partitioner.getPartitionID(vertexID); } else { lost++; continue; } if (this.counter.containsKey(pid)) { this.counter.put(pid, (this.counter.get(pid) + 1)); } else { this.counter.put(pid, 1); } bb.reset(); this.keyserializer.serialize(key); kbytes.set(bb.getData(), 0, bb.getLength()); ksize = kbytes.getLength(); bb.reset(); this.valueserializer.serialize(value); vbytes.set(bb.getData(), 0, bb.getLength()); vsize = vbytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(ksize + vsize)); sizebytes.set(bb.getData(), 0, bb.getLength()); ssize = sizebytes.getLength(); bb.reset(); psserializer.serialize(new IntWritable(pid)); pidbytes.set(bb.getData(), 0, bb.getLength()); psize = pidbytes.getLength(); if ((pidandsize.length - psindex) > (ssize + psize)) { System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } else { psWriter.write(pidandsize, 0, psindex); psindex = 0; System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize); psindex += ssize; System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize); psindex += psize; } if ((buffer.length - bufindex) > (ksize + vsize)) { System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } else if (buffer.length < (ksize + vsize)) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; LOG.info("This is a super record"); dataWriter.write(kbytes.getBytes(), 0, ksize); dataWriter.write(vbytes.getBytes(), 0, vsize); } else { dataWriter.write(buffer, 0, bufindex); bufindex = 0; System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize); bufindex += ksize; System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize); bufindex += vsize; } } if (psindex != 0) { psWriter.write(pidandsize, 0, psindex); } if (bufindex != 0) { dataWriter.write(buffer, 0, bufindex); bufindex = 0; } dataWriter.close(); dataWriter = null; psWriter.close(); psWriter = null; buffer = null; pidandsize = null; this.ssrc.setDirFlag(new String[] { "3" }); this.ssrc.setCounter(this.counter); HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc, Constants.PARTITION_TYPE.HASH); this.staff.setHashBucketToPartition(hashBucketToPartition); byte[][] databuf = new byte[staffNum][dataBufferSize]; int[] databufindex = new int[staffNum]; try { IntWritable pid = new IntWritable(); IntWritable size = new IntWritable(); int belongPid = 0; while (true) { size.readFields(psReader); pid.readFields(psReader); belongPid = hashBucketToPartition.get(pid.get()); if (belongPid != this.staff.getPartition()) { send++; } else { local++; } if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) { dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } else if (databuf[belongPid].length < size.get()) { LOG.info("This is a super record"); byte[] tmp = new byte[size.get()]; dataReader.read(tmp, 0, size.get()); if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(tmp))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(tmp, 0, size.get()); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } tmp = null; } else { if (belongPid == this.staff.getPartition()) { DataInputStream reader = new DataInputStream(new BufferedInputStream( new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker( this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(belongPid); BytesWritable data = new BytesWritable(); data.set(databuf[belongPid], 0, databufindex[belongPid]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } databufindex[belongPid] = 0; dataReader.read(databuf[belongPid], databufindex[belongPid], size.get()); databufindex[belongPid] += size.get(); } } } catch (EOFException ex) { LOG.error("[write]", ex); } for (int i = 0; i < staffNum; i++) { if (databufindex[i] != 0) { if (i == this.staff.getPartition()) { DataInputStream reader = new DataInputStream( new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i]))); try { boolean stop = true; while (stop) { Text key = new Text(); key.readFields(reader); Text value = new Text(); value.readFields(reader); if (key.getLength() > 0 && value.getLength() > 0) { Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString()); if (vertex == null) { lost++; continue; } this.staff.getGraphData().addForAll(vertex); } else { stop = false; } } } catch (IOException e) { LOG.info("IO exception: " + e.getStackTrace()); } } else { ThreadSignle t = tpool.getThread(); while (t == null) { t = tpool.getThread(); } t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i)); t.setJobId(staff.getJobID()); t.setTaskId(staff.getStaffID()); t.setBelongPartition(i); BytesWritable data = new BytesWritable(); data.set(databuf[i], 0, databufindex[i]); t.setData(data); LOG.info("Using Thread is: " + t.getThreadNumber()); t.setStatus(true); } } } dataReader.close(); dataReader = null; psReader.close(); psReader = null; for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); tpool.cleanup(); tpool = null; databuf = null; databufindex = null; this.counter = null; LOG.info("The number of vertices that were read from the input file: " + headNodeNum); LOG.info("The number of vertices that were put into the partition: " + local); LOG.info("The number of vertices that were sent to other partitions: " + send); LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost); } catch (IOException e) { throw e; } catch (InterruptedException e) { throw e; } finally { for (File f : files) { f.delete(); } dir.delete(); dir = new File(path.substring(0, path.lastIndexOf('/'))); dir.delete(); } }
From source file:com.cloudera.castagna.logparser.pig.LogLoader.java
License:Apache License
@Override public Tuple getNext() throws IOException { try {//w w w . ja v a 2 s .c o m boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = (Text) in.getCurrentValue(); byte[] ba = value.getBytes(); // make a copy of the bytes representing the input since // TextInputFormat will reuse the byte array return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength())); } catch (InterruptedException e) { throw new IOException("Error getting input"); } }
From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java
License:Apache License
private static String textifyBytes(Text t) { BytesWritable b = new BytesWritable(); b.set(t.getBytes(), 0, t.getLength()); return b.toString(); }