Example usage for org.apache.lucene.search.suggest InputIterator next

List of usage examples for org.apache.lucene.search.suggest InputIterator next

Introduction

In this page you can find the example usage for org.apache.lucene.search.suggest InputIterator next.

Prototype

BytesRef next() throws IOException;

Source Link

Document

Increments the iteration to the next BytesRef in the iterator.

Usage

From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTest.java

License:Apache License

@Test
public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException,
        IllegalArgumentException, IllegalAccessException {
    final boolean preserveSeparators = getRandom().nextBoolean();
    final boolean preservePositionIncrements = getRandom().nextBoolean();
    final boolean usePayloads = getRandom().nextBoolean();
    final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;

    XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT),
            new StandardAnalyzer(TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null,
            false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP,
            XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
    LineFileDocs docs = new LineFileDocs(getRandom());
    int num = atLeast(150);
    final String[] titles = new String[num];
    final long[] weights = new long[num];
    for (int i = 0; i < titles.length; i++) {
        Document nextDoc = docs.nextDoc();
        IndexableField field = nextDoc.getField("title");
        titles[i] = field.stringValue();
        weights[i] = between(0, 100);/*  w w  w.ja v a2 s. c o m*/

    }
    docs.close();
    final InputIterator primaryIter = new InputIterator() {
        int index = 0;
        long currentWeight = -1;

        @Override
        public Comparator<BytesRef> getComparator() {
            return null;
        }

        @Override
        public BytesRef next() throws IOException {
            if (index < titles.length) {
                currentWeight = weights[index];
                return new BytesRef(titles[index++]);
            }
            return null;
        }

        @Override
        public long weight() {
            return currentWeight;
        }

        @Override
        public BytesRef payload() {
            return null;
        }

        @Override
        public boolean hasPayloads() {
            return false;
        }

    };
    InputIterator iter;
    if (usePayloads) {
        iter = new InputIterator() {
            @Override
            public long weight() {
                return primaryIter.weight();
            }

            @Override
            public Comparator<BytesRef> getComparator() {
                return primaryIter.getComparator();
            }

            @Override
            public BytesRef next() throws IOException {
                return primaryIter.next();
            }

            @Override
            public BytesRef payload() {
                return new BytesRef(Long.toString(weight()));
            }

            @Override
            public boolean hasPayloads() {
                return true;
            }
        };
    } else {
        iter = primaryIter;
    }
    reference.build(iter);
    PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat());

    NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
    final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer,
            namedAnalzyer, provider, null, usePayloads, preserveSeparators, preservePositionIncrements,
            Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null);
    Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
    Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    field.setAccessible(true);
    Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    refField.setAccessible(true);
    assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup)));

    for (int i = 0; i < titles.length; i++) {
        int res = between(1, 10);
        final StringBuilder builder = new StringBuilder();
        SuggestUtils.analyze(namedAnalzyer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
                if (builder.length() == 0) {
                    builder.append(this.charTermAttr.toString());
                }
            }
        });
        String firstTerm = builder.toString();
        String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
        List<LookupResult> refLookup = reference.lookup(prefix, false, res);
        List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
        assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size()));
        for (int j = 0; j < refLookup.size(); j++) {
            assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
            assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - "
                    + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value,
                    lookup.get(j).value, equalTo(refLookup.get(j).value));
            assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
            if (usePayloads) {
                assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
            }
        }
    }
}

From source file:org.elasticsearch.search.suggest.completion.CompletionPostingsFormatTests.java

License:Apache License

@Test
public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException,
        IllegalArgumentException, IllegalAccessException {
    final boolean preserveSeparators = getRandom().nextBoolean();
    final boolean preservePositionIncrements = getRandom().nextBoolean();
    final boolean usePayloads = getRandom().nextBoolean();
    final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;

    XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(), null,
            new StandardAnalyzer(), options, 256, -1, preservePositionIncrements, null, false, 1,
            XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE,
            XAnalyzingSuggester.HOLE_CHARACTER);
    LineFileDocs docs = new LineFileDocs(getRandom());
    int num = scaledRandomIntBetween(150, 300);
    final String[] titles = new String[num];
    final long[] weights = new long[num];
    for (int i = 0; i < titles.length; i++) {
        Document nextDoc = docs.nextDoc();
        IndexableField field = nextDoc.getField("title");
        titles[i] = field.stringValue();
        weights[i] = between(0, 100);/*from   ww  w  . ja  v a2s .  c  o m*/

    }
    docs.close();
    final InputIterator primaryIter = new InputIterator() {
        int index = 0;
        long currentWeight = -1;

        @Override
        public BytesRef next() throws IOException {
            if (index < titles.length) {
                currentWeight = weights[index];
                return new BytesRef(titles[index++]);
            }
            return null;
        }

        @Override
        public long weight() {
            return currentWeight;
        }

        @Override
        public BytesRef payload() {
            return null;
        }

        @Override
        public boolean hasPayloads() {
            return false;
        }

        @Override
        public Set<BytesRef> contexts() {
            return null;
        }

        @Override
        public boolean hasContexts() {
            return false;
        }

    };
    InputIterator iter;
    if (usePayloads) {
        iter = new InputIterator() {
            @Override
            public long weight() {
                return primaryIter.weight();
            }

            @Override
            public BytesRef next() throws IOException {
                return primaryIter.next();
            }

            @Override
            public BytesRef payload() {
                return new BytesRef(Long.toString(weight()));
            }

            @Override
            public boolean hasPayloads() {
                return true;
            }

            @Override
            public Set<BytesRef> contexts() {
                return null;
            }

            @Override
            public boolean hasContexts() {
                return false;
            }
        };
    } else {
        iter = primaryIter;
    }
    reference.build(iter);

    AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(
            preserveSeparators, false, preservePositionIncrements, usePayloads);
    CompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone();
    fieldType.setProvider(currentProvider);
    final CompletionFieldMapper mapper = new CompletionFieldMapper("foo", fieldType, Integer.MAX_VALUE,
            indexSettings, FieldMapper.MultiFields.empty(), null);
    Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
    if (buildAnalyzingLookup instanceof XAnalyzingSuggester) {
        assertEquals(reference.getMaxAnalyzedPathsForOneInput(),
                ((XAnalyzingSuggester) buildAnalyzingLookup).getMaxAnalyzedPathsForOneInput());
    }

    for (int i = 0; i < titles.length; i++) {
        int res = between(1, 10);
        final StringBuilder builder = new StringBuilder();
        SuggestUtils.analyze(analyzer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
                if (builder.length() == 0) {
                    builder.append(this.charTermAttr.toString());
                }
            }
        });
        String firstTerm = builder.toString();
        String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
        List<LookupResult> refLookup = reference.lookup(prefix, false, res);
        List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
        assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size()));
        for (int j = 0; j < refLookup.size(); j++) {
            assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
            assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - "
                    + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value,
                    lookup.get(j).value, equalTo(refLookup.get(j).value));
            assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
            if (usePayloads) {
                assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
            }
        }
    }
}

From source file:org.elasticsearch.search.suggest.completion.old.CompletionPostingsFormatTest.java

License:Apache License

@Test
public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException,
        IllegalArgumentException, IllegalAccessException {
    final boolean preserveSeparators = getRandom().nextBoolean();
    final boolean preservePositionIncrements = getRandom().nextBoolean();
    final boolean usePayloads = getRandom().nextBoolean();
    final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;

    XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(), null,
            new StandardAnalyzer(), options, 256, -1, preservePositionIncrements, null, false, 1,
            XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE,
            XAnalyzingSuggester.HOLE_CHARACTER);
    LineFileDocs docs = new LineFileDocs(getRandom());
    int num = scaledRandomIntBetween(150, 300);
    final String[] titles = new String[num];
    final long[] weights = new long[num];
    for (int i = 0; i < titles.length; i++) {
        Document nextDoc = docs.nextDoc();
        IndexableField field = nextDoc.getField("title");
        titles[i] = field.stringValue();
        weights[i] = between(0, 100);//from  w w w  . j av  a2 s .  c o  m

    }
    docs.close();
    final InputIterator primaryIter = new InputIterator() {
        int index = 0;
        long currentWeight = -1;

        @Override
        public BytesRef next() throws IOException {
            if (index < titles.length) {
                currentWeight = weights[index];
                return new BytesRef(titles[index++]);
            }
            return null;
        }

        @Override
        public long weight() {
            return currentWeight;
        }

        @Override
        public BytesRef payload() {
            return null;
        }

        @Override
        public boolean hasPayloads() {
            return false;
        }

        @Override
        public Set<BytesRef> contexts() {
            return null;
        }

        @Override
        public boolean hasContexts() {
            return false;
        }

    };
    InputIterator iter;
    if (usePayloads) {
        iter = new InputIterator() {
            @Override
            public long weight() {
                return primaryIter.weight();
            }

            @Override
            public BytesRef next() throws IOException {
                return primaryIter.next();
            }

            @Override
            public BytesRef payload() {
                return new BytesRef(Long.toString(weight()));
            }

            @Override
            public boolean hasPayloads() {
                return true;
            }

            @Override
            public Set<BytesRef> contexts() {
                return null;
            }

            @Override
            public boolean hasContexts() {
                return false;
            }
        };
    } else {
        iter = primaryIter;
    }
    reference.build(iter);

    AnalyzingCompletionLookupProvider currentProvider = new AnalyzingCompletionLookupProvider(
            preserveSeparators, false, preservePositionIncrements, usePayloads);
    OldCompletionFieldMapper.CompletionFieldType fieldType = FIELD_TYPE.clone();
    fieldType.setProvider(currentProvider);
    final OldCompletionFieldMapper mapper = new OldCompletionFieldMapper("foo", fieldType, Integer.MAX_VALUE,
            indexSettings, FieldMapper.MultiFields.empty(), null);
    Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
    Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    field.setAccessible(true);
    Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
    refField.setAccessible(true);
    assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup)));

    for (int i = 0; i < titles.length; i++) {
        int res = between(1, 10);
        final StringBuilder builder = new StringBuilder();
        SuggestUtils.analyze(analyzer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
                if (builder.length() == 0) {
                    builder.append(this.charTermAttr.toString());
                }
            }
        });
        String firstTerm = builder.toString();
        String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
        List<LookupResult> refLookup = reference.lookup(prefix, false, res);
        List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
        assertThat(refLookup.toString(), lookup.size(), equalTo(refLookup.size()));
        for (int j = 0; j < refLookup.size(); j++) {
            assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
            assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - "
                    + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value,
                    lookup.get(j).value, equalTo(refLookup.get(j).value));
            assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
            if (usePayloads) {
                assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
            }
        }
    }
}

From source file:org.lukhnos.lucenestudy.Suggester.java

License:MIT License

/**
 * Rebuild a suggestion index from the document index.
 *
 * This method iterates through the entire document index and makes sure that only unique titles
 * are indexed.//  w  w w.  j  ava  2  s . c  o  m
 *
 * @param indexRoot The parent directory inside which both the document index and the suggestion
 *                  index lives.
 * @throws IOException
 */
public static void rebuild(String indexRoot) throws IOException {
    Path indexRootPath = Paths.get(indexRoot);
    Path suggestionPath = getSuggestionIndexPath(indexRootPath);

    // Delete the suggestion index if it exists.
    if (Files.exists(suggestionPath)) {
        Util.deletePath(suggestionPath);
    }

    // Create the suggestion index.
    Analyzer analyzer = Indexer.getAnalyzer();
    Directory suggestionDir = FSDirectory.open(getSuggestionIndexPath(indexRootPath));
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(suggestionDir, analyzer);

    // Open the document index.
    Directory indexDir = FSDirectory.open(Indexer.getMainIndexPath(indexRootPath));
    IndexReader reader = DirectoryReader.open(indexDir);

    // Get a document iterator.
    DocumentDictionary docDict = new DocumentDictionary(reader, Indexer.TITLE_FIELD_NAME, null);
    InputIterator iterator = docDict.getEntryIterator();
    Set<BytesRef> titleSet = new HashSet<>();
    BytesRef next;
    while ((next = iterator.next()) != null) {
        if (titleSet.contains(next)) {
            continue;
        }

        titleSet.add(next);
        suggester.add(next, null, 0, null);
    }

    reader.close();

    suggester.commit();
    suggester.close();
}