Example usage for org.apache.lucene.index Term equals

List of usage examples for org.apache.lucene.index Term equals

Introduction

In this page you can find the example usage for org.apache.lucene.index Term equals.

Prototype

@Override
    public boolean equals(Object obj) 

Source Link

Usage

From source file:ch.ymc.lucehbase.LucandraTermEnum.java

License:Apache License

private void loadTerms(Term skipTo) throws IOException {
    // chose starting term
    String startTerm = indexName + HBaseUtils.delimeter + HBaseUtils.createColumnName(skipTo);
    // this is where we stop;
    String endTerm = indexName + HBaseUtils.delimeter + skipTo.field().substring(0, skipTo.field().length() - 1)
            + new Character((char) (skipTo.field().toCharArray()[skipTo.field().length() - 1] + 1)); // ;

    if ((!skipTo.equals(initTerm) || termPosition == 0) && termCache != null) {
        termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey());
    } else {//  w ww.j  av  a2 s  .  c  om
        termDocFreqBuffer = null;
    }

    if (termDocFreqBuffer != null) {

        termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});
        termPosition = 0;

        logger.debug("Found " + startTerm + " in cache");
        return;
    } else if (chunkCount > 1 && actualInitSize < maxChunkSize) {
        termBuffer = new Term[] {};
        termPosition = 0;
        return; // done!
    }

    chunkCount++;

    // The first time we grab just a few keys
    int count = maxInitSize;

    // otherwise we grab all the rest of the keys
    if (initTerm != null) {
        count = maxChunkSize;
        startTerm = indexName + HBaseUtils.delimeter + HBaseUtils.createColumnName(initTerm);
    }

    long start = System.currentTimeMillis();

    termDocFreqBuffer = new TreeMap<Term, NavigableMap<byte[], byte[]>>();

    // Get all columns
    Scan scan = new Scan(startTerm.getBytes(), endTerm.getBytes());
    scan.addFamily(HBaseUtils.termVecColumnFamily);
    ResultScanner scanner = table.getScanner(scan);

    actualInitSize = 0;
    for (Result result : scanner) {
        ++actualInitSize;
        NavigableMap<byte[], byte[]> columns = result.getFamilyMap(HBaseUtils.termVecColumnFamily);
        byte[] row = result.getRow();
        String rowString = new String(row);

        // term keys look like wikipedia/body/wiki
        String termStr = rowString
                .substring(rowString.indexOf(HBaseUtils.delimeter) + HBaseUtils.delimeter.length());
        Term term;
        try {
            term = HBaseUtils.parseTerm(termStr.getBytes("UTF-8"));
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }

        logger.debug(termStr + " has " + columns.size());

        //check for tombstone keys
        if (columns.size() > 0) {
            termDocFreqBuffer.put(term, columns);
        }

    }

    if (!termDocFreqBuffer.isEmpty()) {
        initTerm = termDocFreqBuffer.lastKey();
    }

    // term to start with next time
    logger.debug("Found " + actualInitSize + " keys in range:" + startTerm + " to " + endTerm + " in "
            + (System.currentTimeMillis() - start) + "ms");

    // add a final key (excluded in submap below)
    termDocFreqBuffer.put(finalTerm, null);

    // put in cache
    for (Term termKey : termDocFreqBuffer.keySet()) {

        if (termCache == null) {
            termCache = termDocFreqBuffer;
        } else {
            termCache.putAll(termDocFreqBuffer);
        }

        indexReader.addTermEnumCache(termKey, this);
    }

    // cache the initial term too
    indexReader.addTermEnumCache(skipTo, this);
    termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});
    termPosition = 0;
    long end = System.currentTimeMillis();

    logger.debug("loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms");
}

From source file:lucandra.LucandraTermEnum.java

License:Apache License

private void loadTerms(Term skipTo) {

    if (initTerm == null)
        initTerm = skipTo;/*from w w w.  j av a2  s. c  o m*/

    // chose starting term
    String startTerm = CassandraUtils
            .hashKey(indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo));

    // ending term. the initial query we don't care since
    // we only pull 2 terms, also we don't
    String endTerm = "";

    //The boundary condition for this search. currently the field.
    String boundryTerm = CassandraUtils.hashKey(indexName + CassandraUtils.delimeter
            + CassandraUtils.createColumnName(skipTo.field(), CassandraUtils.finalToken));

    if ((!skipTo.equals(chunkBoundryTerm) || termPosition == 0) && termCache != null) {
        termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey());
    } else {
        termDocFreqBuffer = null;
    }

    if (termDocFreqBuffer != null) {

        termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});
        termPosition = 0;

        logger.debug("Found " + startTerm + " in cache");
        return;
    } else if (chunkCount > 1 && actualInitSize < maxChunkSize) {

        //include last term
        if (skipTo.equals(chunkBoundryTerm) && termCache.containsKey(skipTo)) {
            termBuffer = new Term[] { skipTo };
            termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey());
        } else {
            termBuffer = new Term[] {};
        }

        termPosition = 0;
        return; // done!
    }

    chunkCount++;

    // The first time we grab just a few keys
    int count = maxInitSize;

    // otherwise we grab all the rest of the keys
    if (chunkBoundryTerm != null) {
        count = maxChunkSize;
        startTerm = CassandraUtils.hashKey(
                indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(chunkBoundryTerm));

        //After first pass use the boundary term, since we know on pass 2 we are using the OPP
        endTerm = boundryTerm;

    }

    long start = System.currentTimeMillis();

    termDocFreqBuffer = new TreeMap<Term, List<ColumnOrSuperColumn>>();

    ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);
    SlicePredicate slicePredicate = new SlicePredicate();

    // Get all columns
    SliceRange sliceRange = new SliceRange(new byte[] {}, new byte[] {}, true, Integer.MAX_VALUE);
    slicePredicate.setSlice_range(sliceRange);

    List<KeySlice> columns;
    try {
        columns = client.get_range_slice(CassandraUtils.keySpace, columnParent, slicePredicate, startTerm,
                endTerm, count, ConsistencyLevel.ONE);
    } catch (InvalidRequestException e) {
        throw new RuntimeException(e);
    } catch (TException e) {
        throw new RuntimeException(e);
    } catch (UnavailableException e) {
        throw new RuntimeException(e);
    } catch (TimedOutException e) {
        throw new RuntimeException(e);
    }

    // term to start with next time
    actualInitSize = columns.size();
    logger.debug("Found " + columns.size() + " keys in range:" + startTerm + " to " + endTerm + " in "
            + (System.currentTimeMillis() - start) + "ms");

    if (actualInitSize > 0) {
        for (KeySlice entry : columns) {

            // term keys look like wikipedia/body/wiki
            String termStr = entry.getKey().substring(
                    entry.getKey().indexOf(CassandraUtils.delimeter) + CassandraUtils.delimeter.length());
            Term term = CassandraUtils.parseTerm(termStr);

            logger.debug(termStr + " has " + entry.getColumns().size());

            //check for tombstone keys or incorrect keys (from RP)
            if (entry.getColumns().size() > 0 && term.field().equals(skipTo.field()) &&
            //from this index
                    entry.getKey().equals(CassandraUtils.hashKey(indexName + CassandraUtils.delimeter
                            + term.field() + CassandraUtils.delimeter + term.text())))

                termDocFreqBuffer.put(term, entry.getColumns());
        }

        if (!termDocFreqBuffer.isEmpty()) {
            chunkBoundryTerm = termDocFreqBuffer.lastKey();
        }
    }

    // add a final key (excluded in submap below)
    termDocFreqBuffer.put(finalTerm, null);

    // put in cache
    for (Term termKey : termDocFreqBuffer.keySet()) {

        if (termCache == null) {
            termCache = termDocFreqBuffer;
        } else {
            termCache.putAll(termDocFreqBuffer);
        }

        indexReader.addTermEnumCache(termKey, this);
    }

    // cache the initial term too
    indexReader.addTermEnumCache(skipTo, this);

    termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});

    termPosition = 0;

    long end = System.currentTimeMillis();

    logger.debug("loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms");

}

From source file:org.apache.solr.search.stats.StatsUtil.java

License:Apache License

private static TermStats termStatsFromString(String data, Term t) {
    if (data == null || data.trim().length() == 0) {
        LOG.warn("Invalid empty term stats string");
        return null;
    }//from w  w  w.ja  v  a2 s  .co  m
    String[] vals = data.split(",");
    if (vals.length < 2) {
        LOG.warn("Invalid term stats string, num fields " + vals.length + " < 2, '" + data + "'");
        return null;
    }
    Term termToUse;
    int idx = 0;
    if (vals.length == 3) {
        idx++;
        // with term
        Term term = termFromString(vals[0]);
        if (term != null) {
            termToUse = term;
            if (t != null) {
                assert term.equals(t);
            }
        } else { // failed term decoding
            termToUse = t;
        }
    } else {
        termToUse = t;
    }
    if (termToUse == null) {
        LOG.warn("Missing term in termStats '" + data + "'");
        return null;
    }
    try {
        long docFreq = Long.parseLong(vals[idx++]);
        long totalTermFreq = Long.parseLong(vals[idx]);
        return new TermStats(termToUse.toString(), docFreq, totalTermFreq);
    } catch (Exception e) {
        LOG.warn("Invalid termStats string '" + data + "'");
        return null;
    }
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

/**
 * Substitutes one term in the term query for another.
 *
 * <p>This method only creates new query object if required; it does not modify the given query.
 *
 * @param query       the query where the substitution should occur.
 * @param original    the original term to replace.
 * @param replacement the term it should be replaced with.
 *
 * @return A new term query where the term has been substituted;
 *         or the same query if no substitution was needed.
 *///www  . ja  v  a 2 s .c  o  m
@Beta
public static TermQuery substitute(TermQuery query, Term original, Term replacement) {
    Term t = query.getTerm();
    if (t.equals(original))
        return new TermQuery(replacement);
    else
        return query;
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

/**
 * Substitutes one term in the phrase query for another.
 *
 * <p>In a phrase query the replacement term must be on the same field as the original term.
 *
 * <p>This method only creates new query object if required; it does not modify the given query.
 *
 * @param query       the query where the substitution should occur.
 * @param original    the original term to replace.
 * @param replacement the term it should be replaced with.
 *
 * @return A new term query where the term has been substituted;
 *         or the same query if no substitution was needed.
 *
 * @throws IllegalArgumentException if the replacement term is not on the same field as the original term.
 *///from   w  ww .  j av a2s  . com
@Beta
public static PhraseQuery substitute(PhraseQuery query, Term original, Term replacement)
        throws IllegalArgumentException {
    boolean doSubstitute = false;
    // Check if we need to substitute
    for (Term t : query.getTerms()) {
        if (t.equals(original)) {
            doSubstitute = true;
        }
    }
    // Substitute if required
    if (doSubstitute) {
        PhraseQuery q = new PhraseQuery();
        for (Term t : query.getTerms()) {
            q.add(t.equals(original) ? replacement : t);
        }
        q.setSlop(query.getSlop());
        q.setBoost(query.getBoost());
        return q;
        // No substitution return the query
    } else
        return query;
}

From source file:org.weborganic.flint.util.Queries.java

License:artistic-license-2.0

/**
 * Substitutes one term in the term query for another.
 *
 * <p>This method only creates new query object if required; it does not modify the given query.
 *
 * @param query       the query where the substitution should occur.
 * @param original    the original term to replace.
 * @param replacement the term it should be replaced with.
 *
 * @return A new term query where the term has been substituted;
 *         or the same query if no substitution was needed.
 *///from www.  jav  a2s .  c  om
@Beta
public static TermQuery substitute(TermQuery query, Term original, Term replacement) {
    Term t = query.getTerm();
    if (t.equals(original)) {
        return new TermQuery(replacement);
    } else {
        return query;
    }
}

From source file:org.weborganic.flint.util.Queries.java

License:artistic-license-2.0

/**
 * Substitutes one term in the phrase query for another.
 *
 * <p>In a phrase query the replacement term must be on the same field as the original term.
 *
 * <p>This method only creates new query object if required; it does not modify the given query.
 *
 * @param query       the query where the substitution should occur.
 * @param original    the original term to replace.
 * @param replacement the term it should be replaced with.
 *
 * @return A new term query where the term has been substituted;
 *         or the same query if no substitution was needed.
 *
 * @throws IllegalArgumentException if the replacement term is not on the same field as the original term.
 *///from  www .  j ava  2s  .c om
@Beta
public static PhraseQuery substitute(PhraseQuery query, Term original, Term replacement)
        throws IllegalArgumentException {
    boolean doSubstitute = false;
    // Check if we need to substitute
    for (Term t : query.getTerms()) {
        if (t.equals(original))
            doSubstitute = true;
    }
    // Substitute if required
    if (doSubstitute) {
        PhraseQuery q = new PhraseQuery();
        for (Term t : query.getTerms()) {
            q.add(t.equals(original) ? replacement : t);
        }
        q.setSlop(query.getSlop());
        q.setBoost(query.getBoost());
        return q;
        // No substitution return the query
    } else
        return query;
}