Example usage for org.apache.hadoop.io Text append

List of usage examples for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len) 

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:mr.MyLineReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *//*from   ww w.  j av a  2  s  .  c  o m*/
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    System.out.println("Enter readCustomLine!!!");
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java

License:Apache License

private static Text concat(Text prefix, String str) {
    Text temp = new Text(prefix);

    try {//ww w. java  2 s.  c  o  m
        ByteBuffer buffer = Text.encode(str, false);
        temp.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }

    return temp;
}

From source file:mvm.rya.indexing.KeyParts.java

License:Apache License

/**
 * Append any byte array to a row key.//from w  w w.jav a2s.  c om
 * @param bytes append this
 * @param keyText text to append to
 */
private static void appendBytes(byte[] bytes, Text keyText) {
    keyText.append(bytes, 0, bytes.length);
}

From source file:mvm.rya.indexing.KeyParts.java

License:Apache License

/**
* Get a collision unlikely hash string and append to the key, 
* so that if two keys have the same value, then they will be the same,
* if two different values that occur at the same time there keys are different.
* If the application uses a very large number of statements at the exact same time,
* the md5 value might be upgraded to for example sha-1 to avoid collisions.
* @param statement//from   w w w.  j  av  a 2 s  . com
* @param keyText
*/
public static void appendUniqueness(Statement statement, Text keyText) {
    keyText.append(HASH_PREFIX, 0, 1); // delimiter
    Value statementValue = new Value(StringUtils.getBytesUtf8(StatementSerializer.writeStatement(statement)));
    byte[] hashOfValue = Md5Hash.md5Binary(statementValue);
    keyText.append(hashOfValue, 0, hashOfValue.length);
}

From source file:net.darkseraphim.webanalytics.hadoop.csv.CSVLineRecordReader.java

License:Apache License

/**
 * Helper function that adds a new value to the values list passed as
 * argument.//  www. jav a2 s. c om
 *
 * @param sb
 *            StringBuffer that has the value to be added
 * @param values
 *            values list
 * @param takeDelimiterOut
 *            should be true when called in the middle of the line, when a
 *            delimiter was found, and false when sb contains the line
 *            ending
 * @throws UnsupportedEncodingException
 */
protected void foundDelimiter(StringBuffer sb, List<Text> values, boolean takeDelimiterOut)
        throws UnsupportedEncodingException {

    //remove trailing LF
    if (sb.length() > 0 && sb.charAt(sb.length() - 1) == '\n') {
        sb.deleteCharAt(sb.length() - 1);
    }

    // Found a real delimiter
    Text text = new Text();
    String val = (takeDelimiterOut) ? sb.substring(0, sb.length() - separator.length()) : sb.toString();
    if (val.startsWith(delimiter) && val.endsWith(delimiter)) {
        val = (val.length() - (2 * delimiter.length()) > 0)
                ? val.substring(delimiter.length(), val.length() - delimiter.length())
                : "";
    }
    text.append(val.getBytes("UTF-8"), 0, val.length());
    values.add(text);
    // Empty string buffer
    sb.setLength(0);
}

From source file:org.apache.accumulo.core.client.admin.FindMax.java

License:Apache License

private static Text findMidPoint(Text minBS, Text maxBS) {
    ByteArrayOutputStream startOS = new ByteArrayOutputStream();
    startOS.write(0); // add a leading zero so bigint does not think its negative
    startOS.write(minBS.getBytes(), 0, minBS.getLength());

    ByteArrayOutputStream endOS = new ByteArrayOutputStream();
    endOS.write(0);// add a leading zero so bigint does not think its negative
    endOS.write(maxBS.getBytes(), 0, maxBS.getLength());

    // make the numbers of the same magnitude
    if (startOS.size() < endOS.size())
        appendZeros(startOS, endOS.size() - startOS.size());
    else if (endOS.size() < startOS.size())
        appendZeros(endOS, startOS.size() - endOS.size());

    BigInteger min = new BigInteger(startOS.toByteArray());
    BigInteger max = new BigInteger(endOS.toByteArray());

    BigInteger mid = max.subtract(min).divide(BigInteger.valueOf(2)).add(min);

    byte[] ba = mid.toByteArray();

    Text ret = new Text();

    if (ba.length == startOS.size()) {
        if (ba[0] != 0)
            throw new RuntimeException();

        // big int added a zero so it would not be negative, drop it
        ret.set(ba, 1, ba.length - 1);/*from  w ww . j a v a 2s .  c om*/
    } else {
        int expLen = Math.max(minBS.getLength(), maxBS.getLength());
        // big int will drop leading 0x0 bytes
        for (int i = ba.length; i < expLen; i++) {
            ret.append(new byte[] { 0 }, 0, 1);
        }

        ret.append(ba, 0, ba.length);
    }

    // remove trailing 0x0 bytes
    while (ret.getLength() > 0 && ret.getBytes()[ret.getLength() - 1] == 0 && ret.compareTo(minBS) > 0) {
        Text t = new Text();
        t.set(ret.getBytes(), 0, ret.getLength() - 1);
        ret = t;
    }

    return ret;
}

From source file:org.apache.accumulo.core.client.admin.FindMax.java

License:Apache License

private static Text findInitialEnd(Scanner scanner) {
    Text end = new Text(new byte[] { (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0xff });

    scanner.setRange(new Range(end, null));

    while (scanner.iterator().hasNext()) {
        Text t = new Text();
        t.append(end.getBytes(), 0, end.getLength());
        t.append(end.getBytes(), 0, end.getLength());
        end = t;//ww  w  .j av  a  2  s.  co m
        scanner.setRange(new Range(end, null));
    }

    return end;
}

From source file:org.apache.accumulo.core.client.impl.BulkImport.java

License:Apache License

public static List<KeyExtent> findOverlappingTablets(ClientContext context, KeyExtentCache extentCache,
        Text startRow, Text endRow, FileSKVIterator reader)
        throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    List<KeyExtent> result = new ArrayList<>();
    Collection<ByteSequence> columnFamilies = Collections.emptyList();
    Text row = startRow;
    if (row == null)
        row = new Text();
    while (true) {
        // log.debug(filename + " Seeking to row " + row);
        reader.seek(new Range(row, null), columnFamilies, false);
        if (!reader.hasTop()) {
            // log.debug(filename + " not found");
            break;
        }//ww  w  .  j a v a2s .  c  o m
        row = reader.getTopKey().getRow();
        KeyExtent extent = extentCache.lookup(row);
        // log.debug(filename + " found row " + row + " at location " + tabletLocation);
        result.add(extent);
        row = extent.getEndRow();
        if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
            row = new Text(row);
            row.append(byte0, 0, byte0.length);
        } else
            break;
    }

    return result;
}

From source file:org.apache.accumulo.core.client.impl.TabletLocatorImpl.java

License:Apache License

private List<Range> binRanges(ClientContext context, List<Range> ranges,
        Map<String, Map<KeyExtent, List<Range>>> binnedRanges, boolean useCache, LockCheckerSession lcSession)
        throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    List<Range> failures = new ArrayList<>();
    List<TabletLocation> tabletLocations = new ArrayList<>();

    boolean lookupFailed = false;

    l1: for (Range range : ranges) {

        tabletLocations.clear();//from   w  w  w. jav a  2s. c  om

        Text startRow;

        if (range.getStartKey() != null) {
            startRow = range.getStartKey().getRow();
        } else
            startRow = new Text();

        TabletLocation tl = null;

        if (useCache)
            tl = lcSession.checkLock(locateTabletInCache(startRow));
        else if (!lookupFailed)
            tl = _locateTablet(context, startRow, false, false, false, lcSession);

        if (tl == null) {
            failures.add(range);
            if (!useCache)
                lookupFailed = true;
            continue;
        }

        tabletLocations.add(tl);

        while (tl.tablet_extent.getEndRow() != null
                && !range.afterEndKey(new Key(tl.tablet_extent.getEndRow()).followingKey(PartialKey.ROW))) {
            if (useCache) {
                Text row = new Text(tl.tablet_extent.getEndRow());
                row.append(new byte[] { 0 }, 0, 1);
                tl = lcSession.checkLock(locateTabletInCache(row));
            } else {
                tl = _locateTablet(context, tl.tablet_extent.getEndRow(), true, false, false, lcSession);
            }

            if (tl == null) {
                failures.add(range);
                if (!useCache)
                    lookupFailed = true;
                continue l1;
            }
            tabletLocations.add(tl);
        }

        for (TabletLocation tl2 : tabletLocations) {
            TabletLocatorImpl.addRange(binnedRanges, tl2.tablet_location, tl2.tablet_extent, range);
        }

    }

    return failures;
}

From source file:org.apache.accumulo.core.client.impl.TabletLocatorImpl.java

License:Apache License

private void lookupTabletLocation(ClientContext context, Text row, boolean retry, LockCheckerSession lcSession)
        throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    Text metadataRow = new Text(tableId);
    metadataRow.append(new byte[] { ';' }, 0, 1);
    metadataRow.append(row.getBytes(), 0, row.getLength());
    TabletLocation ptl = parent.locateTablet(context, metadataRow, false, retry);

    if (ptl != null) {
        TabletLocations locations = locationObtainer.lookupTablet(context, ptl, metadataRow, lastTabletRow,
                parent);/*from   w w w.  ja  v  a 2  s. com*/
        while (locations != null && locations.getLocations().isEmpty()
                && locations.getLocationless().isEmpty()) {
            // try the next tablet, the current tablet does not have any tablets that overlap the row
            Text er = ptl.tablet_extent.getEndRow();
            if (er != null && er.compareTo(lastTabletRow) < 0) {
                // System.out.println("er "+er+"  ltr "+lastTabletRow);
                ptl = parent.locateTablet(context, er, true, retry);
                if (ptl != null)
                    locations = locationObtainer.lookupTablet(context, ptl, metadataRow, lastTabletRow, parent);
                else
                    break;
            } else {
                break;
            }
        }

        if (locations == null)
            return;

        // cannot assume the list contains contiguous key extents... so it is probably
        // best to deal with each extent individually

        Text lastEndRow = null;
        for (TabletLocation tabletLocation : locations.getLocations()) {

            KeyExtent ke = tabletLocation.tablet_extent;
            TabletLocation locToCache;

            // create new location if current prevEndRow == endRow
            if ((lastEndRow != null) && (ke.getPrevEndRow() != null) && ke.getPrevEndRow().equals(lastEndRow)) {
                locToCache = new TabletLocation(new KeyExtent(ke.getTableId(), ke.getEndRow(), lastEndRow),
                        tabletLocation.tablet_location, tabletLocation.tablet_session);
            } else {
                locToCache = tabletLocation;
            }

            // save endRow for next iteration
            lastEndRow = locToCache.tablet_extent.getEndRow();

            updateCache(locToCache, lcSession);
        }
    }

}