Example usage for org.apache.lucene.util Counter newCounter

List of usage examples for org.apache.lucene.util Counter newCounter

Introduction

In this page you can find the example usage for org.apache.lucene.util Counter newCounter.

Prototype

public static Counter newCounter(boolean threadSafe) 

Source Link

Document

Returns a new counter.

Usage

From source file:com.yahoo.bard.webservice.data.dimension.impl.TimeLimitingCollectorManager.java

License:Apache License

@Override
public AccessibleTimeLimitingCollector newCollector() throws IOException {
    return new AccessibleTimeLimitingCollector(TopScoreDocCollector.create(perPage, lastEntry),
            Counter.newCounter(false), searchTimeoutMs);
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from   w w  w . j a  v  a  2s . c o  m
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*from w  w w .  j ava 2s.c  om*/
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*from  w ww .java 2  s .  com*/
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*from ww  w. j  a  va2s.  c  om*/
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*ww  w .j a v  a2  s .  c  om*/
public void testTokenStream_elisions() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final StringBuilder query = new StringBuilder("foo bar baz bam ");
    // add all elisions to the query
    for (final String s : FrenchAnalyzer.DEFAULT_ELISIONS) {
        query.append(s).append("\'bim ");
    }
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query.toString())) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L + FrenchAnalyzer.DEFAULT_ELISIONS.length, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()) ||
                // elisions should be removed from this
                        "bim".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*w  w  w.  j a  v a2 s.c  o m*/
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*w  w w  .  j a va2 s.  c o  m*/
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java

License:Open Source License

/**
 * Tokenizes a query string using Lucenes analyzer. This also removes
 * stopwords from the query string. The {@link IndexDataProvider} instance is
 * used to skip terms no found in the collection.
 *
 * @param query Query string to tokenize
 * @param qAnalyzer Analyzer to use/*from  ww  w  . j  av a2 s  . c  o  m*/
 * @param dataProv IndexDataProvider
 * @return List of tokens from original query with stop-words removed
 */
@SuppressWarnings("ObjectAllocationInLoop")
public static BytesRefArray tokenizeQuery(@NotNull final String query, @NotNull final Analyzer qAnalyzer,
        @Nullable final IndexDataProvider dataProv) {
    BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = qAnalyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    } catch (final IOException e) {
        // not thrown b/c we're using a string reader
    }
    if (dataProv != null) {
        result = removeUnknownTerms(dataProv, result);
    }
    return result;
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtils.java

License:Open Source License

/**
 * Remove terms from the given collection, if they are not found in the
 * collection./*from   w w w. j a  v  a 2 s .c om*/
 *
 * @param dataProv IndexDataProvider
 * @param terms Collection of terms to check against the collection
 * @return Passed in terms with non-collection terms removed
 */
@SuppressFBWarnings("LO_APPENDED_STRING_IN_FORMAT_STRING")
private static BytesRefArray removeUnknownTerms(@NotNull final IndexDataProvider dataProv,
        @NotNull final BytesRefArray terms) {
    final StringBuilder sb = new StringBuilder("Skipped terms (stopword or not in collection): [");
    final FixedBitSet bits = new FixedBitSet(terms.size());
    final BytesRefBuilder spare = new BytesRefBuilder();
    BytesRef term;

    if (terms.size() == 0) {
        return terms;
    } else {
        for (int i = terms.size() - 1; i >= 0; i--) {
            term = terms.get(spare, i);
            if (dataProv.getTermFrequency(term) <= 0L) {
                sb.append(term.utf8ToString()).append(' ');
                bits.set(i);
            }
        }

        if (bits.cardinality() > 0) {
            LOG.warn(sb.toString().trim() + "].");
            final BytesRefArray cleanTerms = new BytesRefArray(Counter.newCounter(false));
            for (int i = terms.size() - 1; i >= 0; i--) {
                if (!bits.get(i)) {
                    term = terms.get(spare, i);
                    cleanTerms.append(term); // copies bytes
                }
            }
            return cleanTerms;
        }
        return terms;
    }
}