Example usage for org.apache.lucene.util BytesRefIterator next

List of usage examples for org.apache.lucene.util BytesRefIterator next

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRefIterator next.

Prototype

BytesRef next() throws IOException;

Source Link

Document

Increments the iteration to the next BytesRef in the iterator.

Usage

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test// w ww  .j  av  a 2  s  . co m
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.EnglishAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from  w w w. j  ava2  s .  c om
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new EnglishAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from  www  .j  a va  2  s  .c  o  m
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from w  w w. j  a v  a 2s.  c  om
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.FrenchAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/* w ww .  ja v  a2s.c o  m*/
public void testTokenStream_elisions() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final StringBuilder query = new StringBuilder("foo bar baz bam ");
    // add all elisions to the query
    for (final String s : FrenchAnalyzer.DEFAULT_ELISIONS) {
        query.append(s).append("\'bim ");
    }
    final Analyzer analyzer = new FrenchAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query.toString())) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L + FrenchAnalyzer.DEFAULT_ELISIONS.length, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()) ||
                // elisions should be removed from this
                        "bim".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test//from   w ww.  j ava 2  s .co m
public void testTokenStream_noStopwords() throws Exception {
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer();
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 4L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "foo".equals(term.utf8ToString()) || "bar".equals(term.utf8ToString())
                        || "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.analyzer.GermanAnalyzerTest.java

License:Open Source License

@SuppressWarnings({ "resource", "ObjectAllocationInLoop", "ImplicitNumericConversion" })
@Test/*from  w ww  . j  a v  a  2  s  .  co m*/
public void testTokenStream() throws Exception {
    final CharArraySet csa = new CharArraySet(Arrays.asList("foo", "bar"), true);
    final String query = "foo bar baz bam";
    final Analyzer analyzer = new GermanAnalyzer(csa);
    final BytesRefArray result = new BytesRefArray(Counter.newCounter(false));

    try (TokenStream stream = analyzer.tokenStream(null, query)) {
        stream.reset();
        while (stream.incrementToken()) {
            final BytesRef term = new BytesRef(stream.getAttribute(CharTermAttribute.class));
            if (term.length > 0) {
                result.append(term);
            }
        }
    }

    Assert.assertEquals("Not all terms returned.", 2L, result.size());

    final BytesRefIterator bri = result.iterator();
    BytesRef term;
    while ((term = bri.next()) != null) {
        Assert.assertTrue("Unknown term found.",
                "baz".equals(term.utf8ToString()) || "bam".equals(term.utf8ToString()));
    }
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test/*  ww w .  ja  va 2 s.  c o  m*/
public void testTokenizeQuery_noMetrics() throws Exception {
    final BytesRefArray bra = QueryUtils.tokenizeQuery("foo bar baz", ANALYZER, null);

    Assert.assertEquals("Extracted terms count mismatch.", 3L, bra.size());

    final BytesRefIterator braIt = bra.iterator();
    BytesRef term;
    while ((term = braIt.next()) != null) {
        final String termStr = term.utf8ToString();
        switch (termStr) {
        case "foo":
        case "bar":
        case "baz":
            break;
        default:
            Assert.fail("Unknown term found.");
            break;
        }
    }
}

From source file:de.unihildesheim.iw.lucene.query.QueryUtilsTest.java

License:Open Source License

/**
 * Test tokenizing with skipping terms not present in index.
 *
 * @throws Exception/* ww w. j  a v  a 2s.  co m*/
 */
@SuppressWarnings("ImplicitNumericConversion")
@Test
public void testTokenizeQuery() throws Exception {
    try (TestMemIndex idx = new TestMemIndex()) {
        final IndexDataProvider idp = idx.getIdp();

        final BytesRefArray bra = QueryUtils.tokenizeQuery("foo bar field baz value", ANALYZER, idp);

        Assert.assertEquals("Extracted terms count mismatch.", 2L, bra.size());

        final BytesRefIterator braIt = bra.iterator();
        BytesRef term;
        while ((term = braIt.next()) != null) {
            final String termStr = term.utf8ToString();
            switch (termStr) {
            case "foo":
            case "bar":
            case "baz":
                Assert.fail("Non-index term found.");
                break;
            case "value":
            case "field":
                // pass
                break;
            default:
                Assert.fail("Unknown term found.");
                break;
            }
        }
    }
}

From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java

License:Open Source License

@SuppressWarnings("ImplicitNumericConversion")
@Test//from  w  w w.j a  va  2 s . c om
public void testHashToArray() throws Exception {
    final Collection<String> data = new HashSet<>(3);
    data.add("foo");
    data.add("bar");
    data.add("baz");

    final BytesRefHash brh = new BytesRefHash();
    data.stream().map(BytesRef::new).forEach(brh::add);

    final BytesRefArray bra = BytesRefUtils.hashToArray(brh);

    Assert.assertEquals("Not all terms found.", data.size(), bra.size());

    final BytesRefIterator bri = bra.iterator();
    BytesRef br;
    while ((br = bri.next()) != null) {
        Assert.assertNotSame("BytesRef not found.", -1, brh.find(br));
    }
}