List of usage examples for org.apache.lucene.index Term equals
@Override
public boolean equals(Object obj)
From source file:ch.ymc.lucehbase.LucandraTermEnum.java
License:Apache License
private void loadTerms(Term skipTo) throws IOException { // chose starting term String startTerm = indexName + HBaseUtils.delimeter + HBaseUtils.createColumnName(skipTo); // this is where we stop; String endTerm = indexName + HBaseUtils.delimeter + skipTo.field().substring(0, skipTo.field().length() - 1) + new Character((char) (skipTo.field().toCharArray()[skipTo.field().length() - 1] + 1)); // ; if ((!skipTo.equals(initTerm) || termPosition == 0) && termCache != null) { termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey()); } else {// w ww.j av a2 s . c om termDocFreqBuffer = null; } if (termDocFreqBuffer != null) { termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; logger.debug("Found " + startTerm + " in cache"); return; } else if (chunkCount > 1 && actualInitSize < maxChunkSize) { termBuffer = new Term[] {}; termPosition = 0; return; // done! } chunkCount++; // The first time we grab just a few keys int count = maxInitSize; // otherwise we grab all the rest of the keys if (initTerm != null) { count = maxChunkSize; startTerm = indexName + HBaseUtils.delimeter + HBaseUtils.createColumnName(initTerm); } long start = System.currentTimeMillis(); termDocFreqBuffer = new TreeMap<Term, NavigableMap<byte[], byte[]>>(); // Get all columns Scan scan = new Scan(startTerm.getBytes(), endTerm.getBytes()); scan.addFamily(HBaseUtils.termVecColumnFamily); ResultScanner scanner = table.getScanner(scan); actualInitSize = 0; for (Result result : scanner) { ++actualInitSize; NavigableMap<byte[], byte[]> columns = result.getFamilyMap(HBaseUtils.termVecColumnFamily); byte[] row = result.getRow(); String rowString = new String(row); // term keys look like wikipedia/body/wiki String termStr = rowString .substring(rowString.indexOf(HBaseUtils.delimeter) + HBaseUtils.delimeter.length()); Term term; try { term = HBaseUtils.parseTerm(termStr.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } logger.debug(termStr + " has " + columns.size()); //check for tombstone keys if (columns.size() > 0) { termDocFreqBuffer.put(term, columns); } } if (!termDocFreqBuffer.isEmpty()) { initTerm = termDocFreqBuffer.lastKey(); } // term to start with next time logger.debug("Found " + actualInitSize + " keys in range:" + startTerm + " to " + endTerm + " in " + (System.currentTimeMillis() - start) + "ms"); // add a final key (excluded in submap below) termDocFreqBuffer.put(finalTerm, null); // put in cache for (Term termKey : termDocFreqBuffer.keySet()) { if (termCache == null) { termCache = termDocFreqBuffer; } else { termCache.putAll(termDocFreqBuffer); } indexReader.addTermEnumCache(termKey, this); } // cache the initial term too indexReader.addTermEnumCache(skipTo, this); termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; long end = System.currentTimeMillis(); logger.debug("loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms"); }
From source file:lucandra.LucandraTermEnum.java
License:Apache License
private void loadTerms(Term skipTo) { if (initTerm == null) initTerm = skipTo;/*from w w w. j av a2 s. c o m*/ // chose starting term String startTerm = CassandraUtils .hashKey(indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo)); // ending term. the initial query we don't care since // we only pull 2 terms, also we don't String endTerm = ""; //The boundary condition for this search. currently the field. String boundryTerm = CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo.field(), CassandraUtils.finalToken)); if ((!skipTo.equals(chunkBoundryTerm) || termPosition == 0) && termCache != null) { termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey()); } else { termDocFreqBuffer = null; } if (termDocFreqBuffer != null) { termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; logger.debug("Found " + startTerm + " in cache"); return; } else if (chunkCount > 1 && actualInitSize < maxChunkSize) { //include last term if (skipTo.equals(chunkBoundryTerm) && termCache.containsKey(skipTo)) { termBuffer = new Term[] { skipTo }; termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey()); } else { termBuffer = new Term[] {}; } termPosition = 0; return; // done! } chunkCount++; // The first time we grab just a few keys int count = maxInitSize; // otherwise we grab all the rest of the keys if (chunkBoundryTerm != null) { count = maxChunkSize; startTerm = CassandraUtils.hashKey( indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(chunkBoundryTerm)); //After first pass use the boundary term, since we know on pass 2 we are using the OPP endTerm = boundryTerm; } long start = System.currentTimeMillis(); termDocFreqBuffer = new TreeMap<Term, List<ColumnOrSuperColumn>>(); ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily); SlicePredicate slicePredicate = new SlicePredicate(); // Get all columns SliceRange sliceRange = new SliceRange(new byte[] {}, new byte[] {}, true, Integer.MAX_VALUE); slicePredicate.setSlice_range(sliceRange); List<KeySlice> columns; try { columns = client.get_range_slice(CassandraUtils.keySpace, columnParent, slicePredicate, startTerm, endTerm, count, ConsistencyLevel.ONE); } catch (InvalidRequestException e) { throw new RuntimeException(e); } catch (TException e) { throw new RuntimeException(e); } catch (UnavailableException e) { throw new RuntimeException(e); } catch (TimedOutException e) { throw new RuntimeException(e); } // term to start with next time actualInitSize = columns.size(); logger.debug("Found " + columns.size() + " keys in range:" + startTerm + " to " + endTerm + " in " + (System.currentTimeMillis() - start) + "ms"); if (actualInitSize > 0) { for (KeySlice entry : columns) { // term keys look like wikipedia/body/wiki String termStr = entry.getKey().substring( entry.getKey().indexOf(CassandraUtils.delimeter) + CassandraUtils.delimeter.length()); Term term = CassandraUtils.parseTerm(termStr); logger.debug(termStr + " has " + entry.getColumns().size()); //check for tombstone keys or incorrect keys (from RP) if (entry.getColumns().size() > 0 && term.field().equals(skipTo.field()) && //from this index entry.getKey().equals(CassandraUtils.hashKey(indexName + CassandraUtils.delimeter + term.field() + CassandraUtils.delimeter + term.text()))) termDocFreqBuffer.put(term, entry.getColumns()); } if (!termDocFreqBuffer.isEmpty()) { chunkBoundryTerm = termDocFreqBuffer.lastKey(); } } // add a final key (excluded in submap below) termDocFreqBuffer.put(finalTerm, null); // put in cache for (Term termKey : termDocFreqBuffer.keySet()) { if (termCache == null) { termCache = termDocFreqBuffer; } else { termCache.putAll(termDocFreqBuffer); } indexReader.addTermEnumCache(termKey, this); } // cache the initial term too indexReader.addTermEnumCache(skipTo, this); termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; long end = System.currentTimeMillis(); logger.debug("loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms"); }
From source file:org.apache.solr.search.stats.StatsUtil.java
License:Apache License
private static TermStats termStatsFromString(String data, Term t) { if (data == null || data.trim().length() == 0) { LOG.warn("Invalid empty term stats string"); return null; }//from w w w.ja v a2 s .co m String[] vals = data.split(","); if (vals.length < 2) { LOG.warn("Invalid term stats string, num fields " + vals.length + " < 2, '" + data + "'"); return null; } Term termToUse; int idx = 0; if (vals.length == 3) { idx++; // with term Term term = termFromString(vals[0]); if (term != null) { termToUse = term; if (t != null) { assert term.equals(t); } } else { // failed term decoding termToUse = t; } } else { termToUse = t; } if (termToUse == null) { LOG.warn("Missing term in termStats '" + data + "'"); return null; } try { long docFreq = Long.parseLong(vals[idx++]); long totalTermFreq = Long.parseLong(vals[idx]); return new TermStats(termToUse.toString(), docFreq, totalTermFreq); } catch (Exception e) { LOG.warn("Invalid termStats string '" + data + "'"); return null; } }
From source file:org.pageseeder.flint.lucene.query.Queries.java
License:Apache License
/** * Substitutes one term in the term query for another. * * <p>This method only creates new query object if required; it does not modify the given query. * * @param query the query where the substitution should occur. * @param original the original term to replace. * @param replacement the term it should be replaced with. * * @return A new term query where the term has been substituted; * or the same query if no substitution was needed. *///www . ja v a 2 s .c o m @Beta public static TermQuery substitute(TermQuery query, Term original, Term replacement) { Term t = query.getTerm(); if (t.equals(original)) return new TermQuery(replacement); else return query; }
From source file:org.pageseeder.flint.lucene.query.Queries.java
License:Apache License
/** * Substitutes one term in the phrase query for another. * * <p>In a phrase query the replacement term must be on the same field as the original term. * * <p>This method only creates new query object if required; it does not modify the given query. * * @param query the query where the substitution should occur. * @param original the original term to replace. * @param replacement the term it should be replaced with. * * @return A new term query where the term has been substituted; * or the same query if no substitution was needed. * * @throws IllegalArgumentException if the replacement term is not on the same field as the original term. *///from w ww . j av a2s . com @Beta public static PhraseQuery substitute(PhraseQuery query, Term original, Term replacement) throws IllegalArgumentException { boolean doSubstitute = false; // Check if we need to substitute for (Term t : query.getTerms()) { if (t.equals(original)) { doSubstitute = true; } } // Substitute if required if (doSubstitute) { PhraseQuery q = new PhraseQuery(); for (Term t : query.getTerms()) { q.add(t.equals(original) ? replacement : t); } q.setSlop(query.getSlop()); q.setBoost(query.getBoost()); return q; // No substitution return the query } else return query; }
From source file:org.weborganic.flint.util.Queries.java
License:artistic-license-2.0
/** * Substitutes one term in the term query for another. * * <p>This method only creates new query object if required; it does not modify the given query. * * @param query the query where the substitution should occur. * @param original the original term to replace. * @param replacement the term it should be replaced with. * * @return A new term query where the term has been substituted; * or the same query if no substitution was needed. *///from www. jav a2s . c om @Beta public static TermQuery substitute(TermQuery query, Term original, Term replacement) { Term t = query.getTerm(); if (t.equals(original)) { return new TermQuery(replacement); } else { return query; } }
From source file:org.weborganic.flint.util.Queries.java
License:artistic-license-2.0
/** * Substitutes one term in the phrase query for another. * * <p>In a phrase query the replacement term must be on the same field as the original term. * * <p>This method only creates new query object if required; it does not modify the given query. * * @param query the query where the substitution should occur. * @param original the original term to replace. * @param replacement the term it should be replaced with. * * @return A new term query where the term has been substituted; * or the same query if no substitution was needed. * * @throws IllegalArgumentException if the replacement term is not on the same field as the original term. *///from www . j ava 2s .c om @Beta public static PhraseQuery substitute(PhraseQuery query, Term original, Term replacement) throws IllegalArgumentException { boolean doSubstitute = false; // Check if we need to substitute for (Term t : query.getTerms()) { if (t.equals(original)) doSubstitute = true; } // Substitute if required if (doSubstitute) { PhraseQuery q = new PhraseQuery(); for (Term t : query.getTerms()) { q.add(t.equals(original) ? replacement : t); } q.setSlop(query.getSlop()); q.setBoost(query.getBoost()); return q; // No substitution return the query } else return query; }