Example usage for org.apache.hadoop.io Text equals

List of usage examples for org.apache.hadoop.io Text equals

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text equals.

Prototype

@Override
public boolean equals(Object o) 

Source Link

Document

Returns true iff o is a Text with the same contents.

Usage

From source file:be.uantwerpen.adrem.disteclat.PrefixComputerReducer.java

License:Apache License

@Override
public void reduce(Text prefix, Iterable<IntMatrixWritable> values, Context context)
        throws IOException, InterruptedException {

    if (prefix.equals(PrefixItemTIDsReporter.ShortKey)) {
        printShorts(values);//w  ww .  java2  s. c o  m
    }
    Map<Integer, IntArrayWritable[]> map = newHashMap();

    int size = 0;
    for (IntMatrixWritable value : values) {

        if (size > 0) {
            throw new IllegalStateException("More than one tid list for a prefix");
        }

        final Writable[] writables = value.get();
        IntArrayWritable[] iaws = (IntArrayWritable[]) writables;
        // This is a hack, first element of the array is a 1-length array that keeps the id of the item.
        final Writable[] itemNameArray = iaws[0].get();
        int item = ((IntWritable) itemNameArray[0]).get();

        IntArrayWritable[] newIaws = new IntArrayWritable[iaws.length - 1];
        for (int i = 1; i < iaws.length; i++) {
            newIaws[i - 1] = iaws[i];
        }
        map.put(item, newIaws);
    }

    int totalTids = 0;
    for (Iterator<IntArrayWritable[]> it = map.values().iterator(); it.hasNext();) {
        IntArrayWritable[] tidLists = it.next();
        int itemSupport = 0;
        for (IntArrayWritable tidList : tidLists) {
            itemSupport += tidList.get().length;
        }
        if (itemSupport >= minSup) {
            totalTids += itemSupport;
        } else {
            it.remove();
        }
    }
    if (totalTids > 0) {
        assignToBucket(prefix, map, totalTids);
    }

}

From source file:cn.com.diditaxi.hive.cf.UDFStrToDate.java

License:Apache License

public Text evaluate(Text dateText, Text patternText) {
    if (dateText == null || patternText == null) {
        return null;
    }//from   w  w w .  j  av a2s .  c  o  m
    try {
        if (!patternText.equals(lastPatternText)) {
            formatter.applyPattern(patternText.toString());
            lastPatternText.set(patternText);
        }
    } catch (Exception e) {
        return null;
    }

    Date date;
    try {
        date = formatter.parse(dateText.toString());
        result.set(standardFormatter.format(date));
        return result;
    } catch (ParseException e) {
        return null;
    }
}

From source file:cn.com.diditaxi.hive.cf.UDFToChar.java

License:Apache License

public Text evaluate(Text dateText, Text patternText) {
    if (dateText == null || patternText == null) {
        return null;
    }//  w  ww  .ja v  a 2s  .c o  m
    if (dateText.toString().trim().length() == 10) {
        standardFormatter.applyPattern("yyyy-MM-dd");
    }

    try {
        if (!patternText.equals(lastPatternText)) {
            formatter.applyPattern(patternText.toString());
            lastPatternText.set(patternText);
        }
    } catch (Exception e) {
        return null;
    }

    Date date;
    try {
        date = standardFormatter.parse(dateText.toString());
        result.set(formatter.format(date));
        return result;
    } catch (ParseException e) {
        return null;
    }
}

From source file:com.axiomine.largecollections.turboutil.TextList.java

License:Apache License

@Override
public int indexOf(Object o) {
    int index = -1;
    int myIndex = -1;
    Iterator<Text> iter = this.iterator();
    while (iter.hasNext()) {
        index++;// ww w .j  a  va 2s.  c  o m
        Text e = iter.next();
        if (e.equals(o)) {
            myIndex = index;
            break;
        }
    }
    return myIndex;
}

From source file:com.axiomine.largecollections.turboutil.TextList.java

License:Apache License

@Override
public int lastIndexOf(Object o) {
    int index = -1;
    int myIndex = -1;
    Iterator<Text> iter = this.iterator();
    while (iter.hasNext()) {
        index++;//from   w w  w .  j  a v a 2  s  .  c o  m
        Text e = iter.next();
        if (e.equals(o)) {
            myIndex = index;
        }
    }
    return myIndex;

}

From source file:com.bah.bahdit.main.plugins.fulltextindex.iterators.RankCalculator.java

License:Apache License

/**
 * Makes sure the execute method is still the same rowid and document
 */// w  w  w  .  j ava 2 s . c  o m
private boolean inSameDocumentAndTerm(Key currentKey, Key firstKey) {

    boolean notNull = (currentKey != null);
    boolean sameRow = currentKey.getRow().equals(firstKey.getRow());
    Text currentCF = currentKey.getColumnFamily();
    Text firstCF = firstKey.getColumnFamily();
    boolean sameCF = currentCF.equals(firstCF);

    return notNull && sameRow && sameCF && source.hasTop();
}

From source file:com.boozallen.cognition.ingest.storm.vo.AccumuloItem.java

License:Apache License

/**
 * Pulls the Key-Value pairs representing the next row from the supplied <code>PeekingIterator</code> and returns
 * them as a <code>SortedMap</code>
 *
 * @param row//  ww  w .jav  a 2s .c o m
 * @return
 */
public static SortedMap<Key, Value> buildRowMap(PeekingIterator<Entry<Key, Value>> row) {
    TreeMap<Key, Value> aggregatedRow = new TreeMap<>();
    Text rowid = null;
    while (row.hasNext() && (rowid == null || rowid.equals(row.peek().getKey().getRow()))) {
        Entry<Key, Value> entry = row.next();
        if (rowid == null) {
            rowid = entry.getKey().getRow();
        }
        aggregatedRow.put(entry.getKey(), entry.getValue());
    }
    return aggregatedRow;
}

From source file:com.datatorrent.stram.security.StramDelegationTokenSelector.java

License:Apache License

@Override
public Token<StramDelegationTokenIdentifier> selectToken(Text text,
        Collection<Token<? extends TokenIdentifier>> clctn) {
    Token<StramDelegationTokenIdentifier> token = null;
    if (text != null) {
        for (Token<? extends TokenIdentifier> ctoken : clctn) {
            if (StramDelegationTokenIdentifier.IDENTIFIER_KIND.equals(ctoken.getKind())
                    && text.equals(ctoken.getService())) {
                token = (Token<StramDelegationTokenIdentifier>) ctoken;
            }//from   ww w  . ja  va  2s.c  om
        }
    }
    return token;
}

From source file:com.facebook.presto.accumulo.tools.RewriteIndex.java

License:Apache License

private void addIndexEntries(Connector connector, AccumuloTable table, long start) {
    LOG.info(format("Scanning data table %s to add index entries", table.getFullTableName()));
    BatchScanner scanner = null;/*from  w  ww  . j a  v a 2s .  c o  m*/
    BatchWriter indexWriter = null;
    try {
        // Create index writer and metrics writer, but we are never going to flush the metrics writer
        indexWriter = connector.createBatchWriter(table.getIndexTableName(), bwc);
        Indexer indexer = new Indexer(connector, table, indexWriter,
                table.getMetricsStorageInstance(connector).newWriter(table));
        LOG.info("Created indexer against " + table.getIndexTableName());

        scanner = connector.createBatchScanner(table.getFullTableName(), auths, 10);
        LOG.info(format("Created batch scanner against %s with auths %s", table.getFullTableName(), auths));

        IteratorSetting timestampFilter = new IteratorSetting(21, "timestamp", TimestampFilter.class);
        TimestampFilter.setRange(timestampFilter, 0L, start);
        scanner.addScanIterator(timestampFilter);

        scanner.setRanges(connector.tableOperations().splitRangeByTablets(table.getFullTableName(), new Range(),
                Integer.MAX_VALUE));

        long numRows = 0L;
        long numIndexEntries = 0L;
        Text prevRow = null;
        Text row = new Text();
        Text cf = new Text();
        Text cq = new Text();
        Mutation mutation = null;
        for (Entry<Key, Value> entry : scanner) {
            entry.getKey().getRow(row);
            entry.getKey().getColumnFamily(cf);
            entry.getKey().getColumnQualifier(cq);

            // if the rows do not match, index the mutation
            if (prevRow != null && !prevRow.equals(row)) {
                if (!dryRun) {
                    indexer.index(mutation);
                }
                ++numRows;
                mutation = null;

                if (numRows % 500000 == 0) {
                    if (dryRun) {
                        LOG.info(
                                format("In progress, would have re-indexed %s rows containing %s index entries",
                                        numRows, numIndexEntries));
                    } else {
                        LOG.info(format("In progress, re-indexed %s rows containing %s index entries", numRows,
                                numIndexEntries));
                    }
                }
            }

            if (mutation == null) {
                mutation = new Mutation(row);
            }

            mutation.put(cf, cq, entry.getKey().getColumnVisibilityParsed(), entry.getKey().getTimestamp(),
                    entry.getValue());
            if (table.getColumns().stream()
                    .filter(column -> column.isIndexed() && column.getFamily().isPresent()
                            && column.getQualifier().isPresent()
                            && column.getFamily().get().equals(new String(cf.copyBytes(), UTF_8))
                            && column.getQualifier().get().equals(new String(cq.copyBytes(), UTF_8)))
                    .count() > 0) {
                ++numIndexEntries;
            }

            if (prevRow == null) {
                prevRow = new Text(row);
            } else {
                prevRow.set(row);
            }
        }

        // Index the final mutation
        if (mutation != null) {
            if (!dryRun) {
                indexer.index(mutation);
            }
            ++numRows;
        }

        if (dryRun) {
            LOG.info(format(
                    "Finished dry run of rewriting index entries. Would have re-indexed %s rows containing %s index entries",
                    numRows, numIndexEntries));
        } else {
            LOG.info(format("Finished adding index entries. Re-indexed %s rows containing %s index entries",
                    numRows, numIndexEntries));
        }
    } catch (AccumuloException | AccumuloSecurityException e) {
        LOG.error("Accumulo exception", e);
    } catch (TableNotFoundException e) {
        LOG.error("Table not found, must have been deleted during process", e);
    } finally {
        if (indexWriter != null) {
            try {
                indexWriter.close();
            } catch (MutationsRejectedException e) {
                LOG.error("Server rejected mutations", e);
            }
        }

        if (scanner != null) {
            scanner.close();
        }
    }
}

From source file:com.facebook.presto.accumulo.tools.RewriteMetricsTask.java

License:Apache License

private void rewriteMetrics(Connector connector, AccumuloTable table, long start) {
    LOG.info("Rewriting metrics for table " + table.getFullTableName());

    TypedValueCombiner.Encoder<Long> encoder = new LongCombiner.StringEncoder();
    BatchWriter writer = null;//ww w .j  av  a 2s.  c o m
    Scanner scanner = null;
    try {
        writer = connector.createBatchWriter(table.getIndexTableName() + "_metrics", bwc);
        LOG.info("Created batch writer against " + table.getIndexTableName() + "_metrics");

        scanner = new IsolatedScanner(connector.createScanner(table.getIndexTableName(), auths));
        LOG.info(format("Created isolated scanner against %s with auths %s", table.getIndexTableName(), auths));

        Set<Pair<String, String>> timestampColumns = table.isTruncateTimestamps() ? table.getColumns().stream()
                .filter(x -> x.getType().equals(TimestampType.TIMESTAMP) && x.getFamily().isPresent())
                .map(x -> Pair.of(x.getFamily().get(), x.getQualifier().get())).collect(Collectors.toSet())
                : ImmutableSet.of();

        LOG.info("Timestamp columns are " + timestampColumns);

        IteratorSetting timestampFilter = new IteratorSetting(21, "timestamp", TimestampFilter.class);
        TimestampFilter.setRange(timestampFilter, 0L, start);
        scanner.addScanIterator(timestampFilter);

        Map<Text, Map<Text, Map<ColumnVisibility, AtomicLong>>> rowMap = new HashMap<>();
        long numMutations = 0L;
        boolean warned = true;
        Text prevRow = null;
        for (Entry<Key, Value> entry : scanner) {
            Text row = entry.getKey().getRow();
            Text cf = entry.getKey().getColumnFamily();

            if (prevRow != null && !prevRow.equals(row)) {
                writeMetrics(start, encoder, writer, rowMap);
                ++numMutations;

                if (numMutations % 500000 == 0) {
                    if (dryRun) {
                        LOG.info(format("In progress, would have written %s metric mutations", numMutations));
                    } else {
                        LOG.info("In progress, metric mutations written: " + numMutations);
                    }
                }
            }

            ColumnVisibility visibility = entry.getKey().getColumnVisibilityParsed();
            incrementMetric(rowMap, row, cf, visibility);
            String[] famQual = cf.toString().split("_");

            if (famQual.length == 2) {
                if (timestampColumns.contains(Pair.of(famQual[0], famQual[1]))) {
                    incrementTimestampMetric(rowMap, cf, visibility, row);
                }
            } else if (warned) {
                LOG.warn(
                        "Unable to re-write timestamp metric when either of a family/qualifier column mapping contains an underscore");
                warned = false;
            }

            if (prevRow == null) {
                prevRow = new Text(row);
            } else {
                prevRow.set(row);
            }
        }

        // Write final metric
        writeMetrics(start, encoder, writer, rowMap);
        ++numMutations;

        if (dryRun) {
            LOG.info(format("Would have written %s mutations", numMutations));
        } else {
            LOG.info("Finished rewriting metrics. Mutations written: " + numMutations);
        }
    } catch (TableNotFoundException e) {
        LOG.error("Table not found, must have been deleted during process", e);
    } catch (MutationsRejectedException e) {
        LOG.error("Server rejected mutations", e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (MutationsRejectedException e) {
                LOG.error("Server rejected mutations", e);
            }
        }

        if (scanner != null) {
            scanner.close();
        }
    }
}