Example usage for org.apache.lucene.util BytesRef utf8ToString

List of usage examples for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString() 

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:org.elasticsearch.index.query.TypeFilterParser.java

License:Apache License

@Override
public Filter parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
    XContentParser parser = parseContext.parser();

    XContentParser.Token token = parser.nextToken();
    if (token != XContentParser.Token.FIELD_NAME) {
        throw new QueryParsingException(parseContext.index(),
                "[type] filter should have a value field, and the type name");
    }/*w w w. ja va 2s  .  co  m*/
    String fieldName = parser.currentName();
    if (!fieldName.equals("value")) {
        throw new QueryParsingException(parseContext.index(),
                "[type] filter should have a value field, and the type name");
    }
    token = parser.nextToken();
    if (token != XContentParser.Token.VALUE_STRING) {
        throw new QueryParsingException(parseContext.index(),
                "[type] filter should have a value field, and the type name");
    }
    BytesRef type = parser.bytes();
    // move to the next token
    parser.nextToken();

    Filter filter;
    //LUCENE 4 UPGRADE document mapper should use bytesref aswell? 
    DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type.utf8ToString());
    if (documentMapper == null) {
        filter = new TermFilter(new Term(TypeFieldMapper.NAME, type));
    } else {
        filter = documentMapper.typeFilter();
    }
    return parseContext.cacheFilter(filter, null);
}

From source file:org.elasticsearch.index.query.TypeQueryParser.java

License:Apache License

@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
    XContentParser parser = parseContext.parser();

    XContentParser.Token token = parser.nextToken();
    if (token != XContentParser.Token.FIELD_NAME) {
        throw new QueryParsingException(parseContext,
                "[type] filter should have a value field, and the type name");
    }/* w ww  . java 2  s . c  o m*/
    String fieldName = parser.currentName();
    if (!fieldName.equals("value")) {
        throw new QueryParsingException(parseContext,
                "[type] filter should have a value field, and the type name");
    }
    token = parser.nextToken();
    if (token != XContentParser.Token.VALUE_STRING) {
        throw new QueryParsingException(parseContext,
                "[type] filter should have a value field, and the type name");
    }
    BytesRef type = parser.utf8Bytes();
    // move to the next token
    parser.nextToken();

    Query filter;
    //LUCENE 4 UPGRADE document mapper should use bytesref as well? 
    DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type.utf8ToString());
    if (documentMapper == null) {
        filter = new TermQuery(new Term(TypeFieldMapper.NAME, type));
    } else {
        filter = documentMapper.typeFilter();
    }
    return filter;
}

From source file:org.elasticsearch.index.search.SimpleQueryStringQueryParser.java

License:Apache License

@Override
public Query newPrefixQuery(String text) {
    List<Query> disjuncts = new ArrayList<>();
    for (Map.Entry<String, Float> entry : weights.entrySet()) {
        final String fieldName = entry.getKey();
        final MappedFieldType ft = context.fieldMapper(fieldName);
        if (ft == null) {
            disjuncts.add(newUnmappedFieldQuery(fieldName));
            continue;
        }/* w  ww  . j  a  va  2 s  . com*/
        try {
            if (settings.analyzeWildcard()) {
                Query analyzedQuery = newPossiblyAnalyzedQuery(fieldName, text, getAnalyzer(ft));
                if (analyzedQuery != null) {
                    disjuncts.add(wrapWithBoost(analyzedQuery, entry.getValue()));
                }
            } else {
                BytesRef term = getAnalyzer(ft).normalize(fieldName, text);
                Query query = ft.prefixQuery(term.utf8ToString(), null, context);
                disjuncts.add(wrapWithBoost(query, entry.getValue()));
            }
        } catch (RuntimeException e) {
            return rethrowUnlessLenient(e);
        }
    }
    if (disjuncts.size() == 1) {
        return disjuncts.get(0);
    }
    return new DisjunctionMaxQuery(disjuncts, 1.0f);
}

From source file:org.elasticsearch.join.fetch.ParentJoinFieldSubFetchPhase.java

License:Apache License

private String getSortedDocValue(String field, LeafReader reader, int docId) {
    try {/*  ww w. j av  a 2s .c  o  m*/
        SortedDocValues docValues = reader.getSortedDocValues(field);
        if (docValues == null || docValues.advanceExact(docId) == false) {
            return null;
        }
        int ord = docValues.ordValue();
        BytesRef joinName = docValues.lookupOrd(ord);
        return joinName.utf8ToString();
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
}

From source file:org.elasticsearch.messy.tests.SimpleSortTests.java

License:Apache License

public void testRandomSorting() throws IOException, InterruptedException, ExecutionException {
    Random random = getRandom();/* w  w  w.j  ava 2s  . c  o m*/
    assertAcked(prepareCreate("test").addMapping("type",
            XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
                    .startObject("sparse_bytes").field("type", "string").field("index", "not_analyzed")
                    .endObject().startObject("dense_bytes").field("type", "string")
                    .field("index", "not_analyzed").endObject().endObject().endObject().endObject()));
    ensureGreen();

    TreeMap<BytesRef, String> sparseBytes = new TreeMap<>();
    TreeMap<BytesRef, String> denseBytes = new TreeMap<>();
    int numDocs = randomIntBetween(200, 300);
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < numDocs; i++) {
        String docId = Integer.toString(i);
        BytesRef ref = null;
        do {
            ref = new BytesRef(TestUtil.randomRealisticUnicodeString(random));
        } while (denseBytes.containsKey(ref));
        denseBytes.put(ref, docId);
        XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());
        if (rarely()) {
            src.field("sparse_bytes", ref.utf8ToString());
            sparseBytes.put(ref, docId);
        }
        src.endObject();
        builders[i] = client().prepareIndex("test", "type", docId).setSource(src);
    }
    indexRandom(true, builders);
    {
        int size = between(1, denseBytes.size());
        SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchAllQuery()).setSize(size)
                .addSort("dense_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) numDocs));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = denseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat("pos: " + i, searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
    if (!sparseBytes.isEmpty()) {
        int size = between(1, sparseBytes.size());
        SearchResponse searchResponse = client().prepareSearch().setQuery(matchAllQuery())
                .setPostFilter(QueryBuilders.existsQuery("sparse_bytes")).setSize(size)
                .addSort("sparse_bytes", SortOrder.ASC).execute().actionGet();
        assertNoFailures(searchResponse);
        assertThat(searchResponse.getHits().getTotalHits(), equalTo((long) sparseBytes.size()));
        assertThat(searchResponse.getHits().hits().length, equalTo(size));
        Set<Entry<BytesRef, String>> entrySet = sparseBytes.entrySet();
        Iterator<Entry<BytesRef, String>> iterator = entrySet.iterator();
        for (int i = 0; i < size; i++) {
            assertThat(iterator.hasNext(), equalTo(true));
            Entry<BytesRef, String> next = iterator.next();
            assertThat(searchResponse.getHits().getAt(i).id(), equalTo(next.getValue()));
            assertThat(searchResponse.getHits().getAt(i).sortValues()[0].toString(),
                    equalTo(next.getKey().utf8ToString()));
        }
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.InternalComposite.java

License:Apache License

/**
 * Format <code>obj</code> using the provided {@link DocValueFormat}.
 * If the format is equals to {@link DocValueFormat#RAW}, the object is returned as is
 * for numbers and a string for {@link BytesRef}s.
 */// ww w. ja  v a  2s  .  c om
static Object formatObject(Object obj, DocValueFormat format) {
    if (obj.getClass() == BytesRef.class) {
        BytesRef value = (BytesRef) obj;
        if (format == DocValueFormat.RAW) {
            return value.utf8ToString();
        } else {
            return format.format((BytesRef) obj);
        }
    } else if (obj.getClass() == Long.class) {
        Long value = (Long) obj;
        if (format == DocValueFormat.RAW) {
            return value;
        } else {
            return format.format(value);
        }
    } else if (obj.getClass() == Double.class) {
        Double value = (Double) obj;
        if (format == DocValueFormat.RAW) {
            return value;
        } else {
            return format.format((Double) obj);
        }
    }
    return obj;
}

From source file:org.elasticsearch.search.facet.terms.strings.HashedScriptAggregator.java

License:Apache License

private boolean accept(BytesRef value) {
    if (excluded != null && excluded.contains(value)) {
        return false;
    }//from  w  w w . java  2 s.  com
    if (convert) {
        // only convert if we need to and only once per doc...
        UnicodeUtil.UTF8toUTF16(value, spare);
        if (matcher != null) {
            assert convert : "regexp: [convert == false] but should be true";
            assert value.utf8ToString().equals(spare.toString()) : "not converted";
            return matcher.reset(spare).matches();
        }
    }
    return true;
}

From source file:org.elasticsearch.search.facet.terms.strings.HashedScriptAggregator.java

License:Apache License

@Override
protected void onValue(int docId, BytesRef value, int hashCode, BytesValues values) {
    if (accept(value)) {
        if (script != null) {
            assert convert : "script: [convert == false] but should be true";
            assert value.utf8ToString().equals(spare.toString()) : "not converted";
            script.setNextDocId(docId);//w  w w.  ja  va  2s .  c o  m
            // LUCENE 4 UPGRADE: needs optimization -- maybe a CharSequence
            // does the job here?
            // we only create that string if we really need
            script.setNextVar("term", spare.toString());
            Object scriptValue = script.run();
            if (scriptValue == null) {
                return;
            }
            if (scriptValue instanceof Boolean) {
                if (!((Boolean) scriptValue)) {
                    return;
                }
            } else {
                scriptSpare.copyChars(scriptValue.toString());
                hashCode = scriptSpare.hashCode();
                super.onValue(docId, scriptSpare, hashCode, values);
                return;
            }
        }
        assert convert || (matcher == null && script == null);
        super.onValue(docId, value, hashCode, values);
    }
}

From source file:org.elasticsearch.search.facet.terms.strings.TermsStringOrdinalsFacetExecutor.java

License:Apache License

@Override
public InternalFacet buildFacet(String facetName) {
    final CharsRef spare = new CharsRef();
    AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
    for (ReaderAggregator aggregator : aggregators) {
        if (aggregator.nextPosition()) {
            queue.add(aggregator);//from   w w w.  j a  va2s.  co m
        }
    }

    // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
    if (shardSize < EntryPriorityQueue.LIMIT) {
        // optimize to use priority size
        EntryPriorityQueue ordered = new EntryPriorityQueue(shardSize, comparatorType.comparator());

        while (queue.size() > 0) {
            ReaderAggregator agg = queue.top();
            BytesRef value = agg.copyCurrent(); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
            int count = 0;
            do {
                count += agg.counts.get(agg.position);
                if (agg.nextPosition()) {
                    agg = queue.updateTop();
                } else {
                    // we are done with this reader
                    queue.pop();
                    agg = queue.top();
                }
            } while (agg != null && value.equals(agg.current));

            if (count > minCount) {
                if (excluded != null && excluded.contains(value)) {
                    continue;
                }
                if (matcher != null) {
                    UnicodeUtil.UTF8toUTF16(value, spare);
                    assert spare.toString().equals(value.utf8ToString());
                    if (!matcher.reset(spare).matches()) {
                        continue;
                    }
                }
                InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count);
                ordered.insertWithOverflow(entry);
            }
        }
        InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()];
        for (int i = ordered.size() - 1; i >= 0; i--) {
            list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop();
        }

        return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing,
                total);
    }

    BoundedTreeSet<InternalStringTermsFacet.TermEntry> ordered = new BoundedTreeSet<InternalStringTermsFacet.TermEntry>(
            comparatorType.comparator(), shardSize);

    while (queue.size() > 0) {
        ReaderAggregator agg = queue.top();
        BytesRef value = agg.copyCurrent(); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
        int count = 0;
        do {
            count += agg.counts.get(agg.position);
            if (agg.nextPosition()) {
                agg = queue.updateTop();
            } else {
                // we are done with this reader
                queue.pop();
                agg = queue.top();
            }
        } while (agg != null && value.equals(agg.current));

        if (count > minCount) {
            if (excluded != null && excluded.contains(value)) {
                continue;
            }
            if (matcher != null) {
                UnicodeUtil.UTF8toUTF16(value, spare);
                assert spare.toString().equals(value.utf8ToString());
                if (!matcher.reset(spare).matches()) {
                    continue;
                }
            }
            InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count);
            ordered.add(entry);
        }
    }

    return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}

From source file:org.elasticsearch.search.fetch.parent.ParentFieldSubFetchPhase.java

License:Apache License

public static String getParentId(ParentFieldMapper fieldMapper, LeafReader reader, int docId) {
    try {/*from w w  w  . j av a2  s  .c  o  m*/
        SortedDocValues docValues = reader.getSortedDocValues(fieldMapper.name());
        BytesRef parentId = docValues.get(docId);
        assert parentId.length > 0;
        return parentId.utf8ToString();
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
}