List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:mvm.rya.accumulo.pig.AccumuloStorage.java
License:Apache License
public void putNext(Tuple t) throws ExecException, IOException { Mutation mut = new Mutation(objToText(t.get(0))); Text cf = objToText(t.get(1)); Text cq = objToText(t.get(2)); if (t.size() > 4) { Text cv = objToText(t.get(3)); Value val = new Value(objToBytes(t.get(4))); if (cv.getLength() == 0) { mut.put(cf, cq, val); } else {/*from www . ja v a 2 s . c o m*/ mut.put(cf, cq, new ColumnVisibility(cv), val); } } else { Value val = new Value(objToBytes(t.get(3))); mut.put(cf, cq, val); } try { writer.write(tableName, mut); } catch (InterruptedException e) { throw new IOException(e); } }
From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java
License:Apache License
public static Text removePrefix(Text termWithPrefix) { Text temp = new Text(); temp.set(termWithPrefix.getBytes(), 2, termWithPrefix.getLength() - 2); return temp;/* www . j a v a2 s .co m*/ }
From source file:net.mooncloud.hadoop.hive.ql.udf.UDFMd5.java
License:Apache License
/** * Convert String to md5/*from w ww . j a v a 2 s. c om*/ */ public Text evaluate(Text n) { if (n == null) { return null; } digest.reset(); digest.update(n.getBytes(), 0, n.getLength()); byte[] md5Bytes = digest.digest(); String md5Hex = Hex.encodeHexString(md5Bytes); result.set(md5Hex); return result; }
From source file:net.mooncloud.hadoop.hive.ql.udf.UDFRSASign.java
License:Apache License
public BytesWritable evaluate(Text n, Text privateKey) { if (n == null || privateKey == null) { return null; }/*from w w w . j a v a2 s .co m*/ try { byte[] bytes = new byte[privateKey.getLength()]; System.arraycopy(privateKey.getBytes(), 0, bytes, 0, privateKey.getLength()); byte[] decoded = Base64.decodeBase64(bytes); result = new BytesWritable(RSAUtils.sign(n.getBytes(), decoded)); } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:net.mooncloud.hadoop.hive.ql.udf.UDFRSAVerify.java
License:Apache License
public BooleanWritable evaluate(Text n, Text sign, Text publicKey) { if (n == null || sign == null || publicKey == null) { return null; }//from w w w .j a v a 2s. c o m try { byte[] publicKeybytes = new byte[publicKey.getLength()]; System.arraycopy(publicKey.getBytes(), 0, publicKeybytes, 0, publicKey.getLength()); byte[] publicKeydecoded = Base64.decodeBase64(publicKeybytes); byte[] signbytes = new byte[sign.getLength()]; System.arraycopy(sign.getBytes(), 0, signbytes, 0, sign.getLength()); byte[] signdecoded = Base64.decodeBase64(signbytes); result = new BooleanWritable(RSAUtils.verify(n.getBytes(), publicKeydecoded, signdecoded)); } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:net.mooncloud.hadoop.hive.ql.udf.UDFUnbase64.java
License:Apache License
public BytesWritable evaluate(Text value) { if (value == null) { return null; }/* w ww. j a v a2s.c o m*/ byte[] bytes = new byte[value.getLength()]; System.arraycopy(value.getBytes(), 0, bytes, 0, value.getLength()); byte[] decoded = Base64.decodeBase64(bytes); result.set(decoded, 0, decoded.length); return result; }
From source file:nl.basjes.hadoop.io.compress.TestSplittableCodecSeams.java
License:Apache License
/** * This test checks if reading the file in a splitted way results * in the same lines as reading the file as a single 'split'. *//* www . j ava 2 s . c om*/ private void validateSplitSeams(final Configuration conf, final FileSystem fs, final Path filename, final Class<? extends SplittableCompressionCodec> codecClass, final long splitSize, final long recordsInFile, final long lastSplitSizeLimit) throws IOException { // To make the test predictable conf.setInt("io.file.buffer.size", BUFFER_SIZE); final FileStatus infile = fs.getFileStatus(filename); final long inputLength = infile.getLen(); if (inputLength > Integer.MAX_VALUE) { fail("Bad test file length."); } LOG.info("Input is " + inputLength + " bytes. " + "making a split every " + splitSize + " bytes."); if (inputLength <= splitSize) { fail("The compressed test file is too small to do any useful testing."); } final SplittableCompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); /* * The validation is done as follows: * 1) We open the entire file as a single split as the reference * 2) We create a sequence of splits and validate each line with the * reference split. * The lines from these two must match 100%. */ final Text refLine = new Text(); final Decompressor refDcmp = CodecPool.getDecompressor(codec); assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp); final SplitCompressionInputStream refStream = codec.createInputStream(fs.open(infile.getPath()), refDcmp, 0, inputLength, SplittableCompressionCodec.READ_MODE.BYBLOCK); final LineReader refReader = new LineReader(refStream, conf); final Text line = new Text(); final Decompressor dcmp = CodecPool.getDecompressor(codec); assertNotNull("Unable to load the decompressor for codec \"" + codec.getClass().getName() + "\"", refDcmp); try { long start = 0; long end = splitSize; int splitCount = 0; long refLineNumber = 0; long splitLineNumber; while (end <= inputLength) { splitLineNumber = 0; ++splitCount; LOG.debug("-------------------------------------------------------"); dcmp.reset(); // Reset the Decompressor for reuse with the new stream final SplitCompressionInputStream splitStream = codec.createInputStream(fs.open(infile.getPath()), dcmp, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); final long adjustedStart = splitStream.getAdjustedStart(); final long adjustedEnd = splitStream.getAdjustedEnd(); if (LOG.isDebugEnabled()) { LOG.debug("Doing split " + splitCount + " on range " + " (" + start + "-" + end + ")" + " adjusted to (" + adjustedStart + "-" + adjustedEnd + ")"); } final LineReader lreader = new LineReader(splitStream, conf); if (start != 0) { // Not the first split so we discard the first (incomplete) line. int readChars = lreader.readLine(line); if (LOG.isTraceEnabled()) { LOG.trace("DISCARD LINE " + 0 + " in split " + splitCount + " pos=" + splitStream.getPos() + " length=" + readChars + ": \"" + line + "\""); } } // Now read until the end of this split while (nextKeyValue(splitStream, lreader, adjustedEnd, line)) { ++splitLineNumber; // Get the reference value if (!nextKeyValue(refStream, refReader, inputLength, refLine)) { LOG.error(String.format("S>%05d: %s", splitLineNumber, line)); fail("Split goes beyond the end of the reference with line number " + splitLineNumber); } ++refLineNumber; if (LOG.isDebugEnabled() && refLineNumber > (recordsInFile - 10)) { LOG.debug(String.format("R<%05d: %s", refLineNumber, refLine)); LOG.debug(String.format("S>%05d: %s", splitLineNumber, line)); } assertEquals("Line must be same in reference and in split at line " + refLineNumber, refLine, line); if (LOG.isTraceEnabled()) { LOG.trace("LINE " + splitLineNumber + " in split " + splitCount + " (" + refLineNumber + ") pos=" + splitStream.getPos() + " length=" + line.getLength() + ": \"" + line + "\""); } } // We just read through the entire split LOG.debug("Checked split " + splitCount + " (" + adjustedStart + "-" + adjustedEnd + ") " + "containing " + splitLineNumber + " lines."); if (end == inputLength) { LOG.info("====================> Finished the last split <===================="); break; // We've reached the end of the last split } // Determine start and end for the next split start = end; if ((end + lastSplitSizeLimit) > inputLength) { end = inputLength; LOG.info("====================> Starting the last split (" + start + " - " + end + ") <===================="); } else { end += splitSize; LOG.info("====================> Starting the next split (" + start + " - " + end + ") <===================="); } } if (nextKeyValue(refStream, refReader, inputLength, refLine)) { ++refLineNumber; LOG.error(String.format("R<%05d: %s", refLineNumber, refLine)); fail("The reference is at least one line longer than the last split ( " + "splitSize=" + splitSize + ", " + "inputLength= " + inputLength + ", " + "split start=" + start + ", " + "split end=" + end + ", " + "line=" + refLineNumber + ")"); } LOG.info("Verified " + refLineNumber + " lines in " + splitCount + " splits."); } finally { CodecPool.returnDecompressor(dcmp); CodecPool.returnDecompressor(refDcmp); } }
From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java
License:Open Source License
/** * Override method that if the file has not already been read, reads it into memory, so that a call to * getCurrentValue() will return the lines this file as Text. Then, returns true. If it has already been read, * then returns false without updating any internal state. * * @return Boolean whether the file was read or not. * @throws IOException If there is an error reading the file. * @throws InterruptedException If there is an error. *///w w w . j av a 2s . c o m @Override public boolean nextKeyValue() throws IOException, InterruptedException { // Initialize key and value. if (this.key == null) { this.key = new LongWritable(); } if (this.value == null) { this.value = new Text(); } // Get the key and value. this.key.set(this.pos); this.value.clear(); Text endline = new Text("\n"); int newSize = 0; for (int i = 0; i < this.NLINESTOPROCESS; i++) { Text v = new Text(); while (this.pos < this.end) { newSize = this.in.readLine(v, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength)); this.value.append(v.getBytes(), 0, v.getLength()); this.value.append(endline.getBytes(), 0, endline.getLength()); if (newSize == 0) { break; } this.pos += newSize; if (newSize < this.maxLineLength) { break; } } } // If newSize is still zero, return false, else true. if (newSize == 0) { this.key = null; this.value = null; return false; } else { return true; } }
From source file:org.apache.accumulo.core.client.admin.FindMax.java
License:Apache License
private static Text findMidPoint(Text minBS, Text maxBS) { ByteArrayOutputStream startOS = new ByteArrayOutputStream(); startOS.write(0); // add a leading zero so bigint does not think its negative startOS.write(minBS.getBytes(), 0, minBS.getLength()); ByteArrayOutputStream endOS = new ByteArrayOutputStream(); endOS.write(0);// add a leading zero so bigint does not think its negative endOS.write(maxBS.getBytes(), 0, maxBS.getLength()); // make the numbers of the same magnitude if (startOS.size() < endOS.size()) appendZeros(startOS, endOS.size() - startOS.size()); else if (endOS.size() < startOS.size()) appendZeros(endOS, startOS.size() - endOS.size()); BigInteger min = new BigInteger(startOS.toByteArray()); BigInteger max = new BigInteger(endOS.toByteArray()); BigInteger mid = max.subtract(min).divide(BigInteger.valueOf(2)).add(min); byte[] ba = mid.toByteArray(); Text ret = new Text(); if (ba.length == startOS.size()) { if (ba[0] != 0) throw new RuntimeException(); // big int added a zero so it would not be negative, drop it ret.set(ba, 1, ba.length - 1);//ww w .jav a 2s .co m } else { int expLen = Math.max(minBS.getLength(), maxBS.getLength()); // big int will drop leading 0x0 bytes for (int i = ba.length; i < expLen; i++) { ret.append(new byte[] { 0 }, 0, 1); } ret.append(ba, 0, ba.length); } // remove trailing 0x0 bytes while (ret.getLength() > 0 && ret.getBytes()[ret.getLength() - 1] == 0 && ret.compareTo(minBS) > 0) { Text t = new Text(); t.set(ret.getBytes(), 0, ret.getLength() - 1); ret = t; } return ret; }
From source file:org.apache.accumulo.core.client.admin.FindMax.java
License:Apache License
private static Text findInitialEnd(Scanner scanner) { Text end = new Text(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff }); scanner.setRange(new Range(end, null)); while (scanner.iterator().hasNext()) { Text t = new Text(); t.append(end.getBytes(), 0, end.getLength()); t.append(end.getBytes(), 0, end.getLength()); end = t;/*w w w . ja v a 2 s . c o m*/ scanner.setRange(new Range(end, null)); } return end; }