Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:org.apache.accumulo.proxy.SimpleProxyBase.java

License:Apache License

static private ByteBuffer t2bb(Text t) {
    return ByteBuffer.wrap(t.getBytes());
}

From source file:org.apache.accumulo.server.tabletserver.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }//w w w.j  a v a  2s .  c  o m

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(),
                extent.getEndRow(), files, .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // check to see that the midPoint is not equal to the end key
        if (keys.get(.5).compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }
        Key mid = keys.get(.5);
        Text text = (mid == null) ? null : mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }
        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }
}

From source file:org.apache.accumulo.server.tabletserver.Tablet.java

License:Apache License

private static int longestCommonLength(Text text, Text beforeMid) {
    int common = 0;
    while (common < text.getLength() && common < beforeMid.getLength()
            && text.getBytes()[common] == beforeMid.getBytes()[common]) {
        common++;/*from   www  .j  a  v  a  2  s. co m*/
    }
    return common;
}

From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java

License:Apache License

private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry,
        HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException {
    TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context);

    Map<TKeyExtent, List<TRange>> batch = new TreeMap<>();

    for (KeyExtent keyExtent : entry.getValue()) {
        Text row = keyExtent.getEndRow();
        Text row2 = null;//from  ww  w  .ja  v  a2  s  . co m

        if (row == null) {
            row = keyExtent.getPrevEndRow();

            if (row != null) {
                row = new Text(row);
                row.append(new byte[] { 'a' }, 0, 1);
            } else {
                row = new Text("1234567890");
            }

            row2 = new Text(row);
            row2.append(new byte[] { '!' }, 0, 1);
        } else {
            row = new Text(row);
            row2 = new Text(row);

            row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1);
        }

        Range r = new Range(row, true, row2, false);
        batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift()));
    }
    TInfo tinfo = Tracer.traceInfo();
    Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap();
    List<IterInfo> emptyListIterInfo = Collections.emptyList();
    List<TColumn> emptyListColumn = Collections.emptyList();
    InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn,
            emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L,
            null, null);
    if (is.result.more) {
        MultiScanResult result = client.continueMultiScan(tinfo, is.scanID);
        checkFailures(entry.getKey(), failures, result);

        while (result.more) {
            result = client.continueMultiScan(tinfo, is.scanID);
            checkFailures(entry.getKey(), failures, result);
        }
    }

    client.closeMultiScan(tinfo, is.scanID);

    ThriftUtil.returnClient((TServiceClient) client);
}

From source file:org.apache.accumulo.tserver.tablet.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = tableConfiguration.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    long maxEndRow = tableConfiguration.getMemoryInBytes(Property.TABLE_MAX_END_ROW_SIZE);

    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }//  w  w w .  j a v a 2s .  c om

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(getTabletServer().getFileSystem(), getTabletServer().getConfiguration(),
                extent.getPrevEndRow(), extent.getEndRow(), FileUtil.toPathStrings(files), .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(getTabletServer().getFileSystem(),
                    getTabletServer().getConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // We expect to get a midPoint for this set of files. If we don't get one, we have a problem.
        final Key mid = keys.get(.5);
        if (null == mid) {
            throw new IllegalStateException("Could not determine midpoint for files");
        }

        // check to see that the midPoint is not equal to the end key
        if (mid.compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.getLength() > maxEndRow) {
                    log.warn("Cannot split tablet " + extent + ", selected split point too long.  Length :  "
                            + candidate.getLength());

                    sawBigRow = true;
                    timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
                    timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

                    return null;
                }
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }

        Text text = mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }

        if (text.getLength() > maxEndRow) {
            log.warn("Cannot split tablet " + extent + ", selected split point too long.  Length :  "
                    + text.getLength());

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }

        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }

}

From source file:org.apache.accumulo.tserver.Tablet.java

License:Apache License

private SplitRowSpec findSplitRow(Collection<FileRef> files) {

    // never split the root tablet
    // check if we already decided that we can never split
    // check to see if we're big enough to split

    long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD);
    if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) {
        return null;
    }/*from ww w .j av a  2 s .c o m*/

    // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred.
    if (sawBigRow) {
        if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime
                || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) {
            // a minor compaction or map file import has occurred... check again
            sawBigRow = false;
        } else {
            // nothing changed, do not split
            return null;
        }
    }

    SortedMap<Double, Key> keys = null;

    try {
        // we should make .25 below configurable
        keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(),
                extent.getEndRow(), FileUtil.toPathStrings(files), .25);
    } catch (IOException e) {
        log.error("Failed to find midpoint " + e.getMessage());
        return null;
    }

    // check to see if one row takes up most of the tablet, in which case we can not split
    try {

        Text lastRow;
        if (extent.getEndRow() == null) {
            Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files);
            lastRow = lastKey.getRow();
        } else {
            lastRow = extent.getEndRow();
        }

        // check to see that the midPoint is not equal to the end key
        if (keys.get(.5).compareRow(lastRow) == 0) {
            if (keys.firstKey() < .5) {
                Key candidate = keys.get(keys.firstKey());
                if (candidate.compareRow(lastRow) != 0) {
                    // we should use this ratio in split size estimations
                    if (log.isTraceEnabled())
                        log.trace(String.format(
                                "Splitting at %6.2f instead of .5, row at .5 is same as end row%n",
                                keys.firstKey()));
                    return new SplitRowSpec(keys.firstKey(), candidate.getRow());
                }

            }

            log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow);

            sawBigRow = true;
            timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime;
            timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime;

            return null;
        }
        Key mid = keys.get(.5);
        Text text = (mid == null) ? null : mid.getRow();
        SortedMap<Double, Key> firstHalf = keys.headMap(.5);
        if (firstHalf.size() > 0) {
            Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow();
            Text shorter = new Text();
            int trunc = longestCommonLength(text, beforeMid);
            shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1));
            text = shorter;
        }
        return new SplitRowSpec(.5, text);
    } catch (IOException e) {
        // don't split now, but check again later
        log.error("Failed to find lastkey " + e.getMessage());
        return null;
    }
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

@Override
protected void map(Writable k, Text value, Context context) throws IOException, InterruptedException {
    BlurRecord record = _mutate.getRecord();
    record.clearColumns();/*from ww  w  . j  av  a  2 s. c  o  m*/
    String str = value.toString();

    Iterable<String> split = _splitter.split(str);
    List<String> list = toList(split);

    int offset = 0;
    boolean gen = false;
    if (!_autoGenerateRowIdAsHashOfData) {
        record.setRowId(list.get(offset++));
    } else {
        _digest.reset();
        byte[] bs = value.getBytes();
        int length = value.getLength();
        _digest.update(bs, 0, length);
        record.setRowId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX));
        gen = true;
    }

    if (!_autoGenerateRecordIdAsHashOfData) {
        record.setRecordId(list.get(offset++));
    } else {
        if (gen) {
            record.setRecordId(record.getRowId());
        } else {
            _digest.reset();
            byte[] bs = value.getBytes();
            int length = value.getLength();
            _digest.update(bs, 0, length);
            record.setRecordId(new BigInteger(_digest.digest()).toString(Character.MAX_RADIX));
        }
    }
    String family;
    if (_familyNotInFile) {
        family = _familyFromPath;
    } else {
        family = list.get(offset++);
    }
    record.setFamily(family);

    List<String> columnNames = _columnNameMap.get(family);
    if (columnNames == null) {
        throw new IOException("Family [" + family + "] is missing in the definition.");
    }
    if (list.size() - offset != columnNames.size()) {

        String options = "";

        if (!_autoGenerateRowIdAsHashOfData) {
            options += "rowid,";
        }
        if (!_autoGenerateRecordIdAsHashOfData) {
            options += "recordid,";
        }
        if (!_familyNotInFile) {
            options += "family,";
        }
        String msg = "Record [" + str + "] does not match defined record [" + options
                + getColumnNames(columnNames) + "].";
        throw new IOException(msg);
    }

    for (int i = 0; i < columnNames.size(); i++) {
        String val = handleHiveNulls(list.get(i + offset));
        if (val != null) {
            record.addColumn(columnNames.get(i), val);
            _columnCounter.increment(1);
        }
    }
    _key.set(record.getRowId());
    _mutate.setMutateType(MUTATE_TYPE.REPLACE);
    context.write(_key, _mutate);
    _recordCounter.increment(1);
    context.progress();
}

From source file:org.apache.drill.exec.store.text.DrillTextRecordReader.java

License:Apache License

/**
 * Returns the index within the text of the first occurrence of delimiter, starting the search at the specified index.
 *
 * @param  text  the text being searched
 * @param  delimiter the delimiter// w w  w  .j a v a2  s.  c o m
 * @param  start the index to start searching
 * @return      the first occurrence of delimiter, starting the search at the specified index
 */
public int find(Text text, byte delimiter, int start) {
    int len = text.getLength();
    int p = start;
    byte[] bytes = text.getBytes();
    boolean inQuotes = false;
    while (p < len) {
        if ('\"' == bytes[p]) {
            inQuotes = !inQuotes;
        }
        if (!inQuotes && bytes[p] == delimiter) {
            return p;
        }
        p++;
    }
    return -1;
}

From source file:org.apache.flume.sink.hdfs.HDFSTextFormatter.java

License:Apache License

@Override
public byte[] getBytes(Event e) {
    Text record = makeText(e);
    record.append("\n".getBytes(), 0, 1);
    byte[] rawBytes = record.getBytes();
    return Arrays.copyOf(rawBytes, record.getLength());
}

From source file:org.apache.fluo.core.util.ByteUtil.java

License:Apache License

/**
 * Convert from Hadoop Text to Bytes/* w  w w  . j  a  v a 2 s .  c  o m*/
 */
public static Bytes toBytes(Text t) {
    return Bytes.of(t.getBytes(), 0, t.getLength());
}