Example usage for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString()

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:org.apache.solr.schema.TrieField.java

License:Apache License

@Override
public String readableToIndexed(String val) {
    // TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts!
    final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
    readableToIndexed(val, bytes);
    return bytes.utf8ToString();
}

From source file:org.apache.solr.schema.TrieField.java

License:Apache License

@Override
public String storedToIndexed(IndexableField f) {
    final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
    final Number val = f.numericValue();
    if (val != null) {
        switch (type) {
        case INTEGER:
            NumericUtils.intToPrefixCodedBytes(val.intValue(), 0, bytes);
            break;
        case FLOAT:
            NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes);
            break;
        case LONG: //fallthrough!
        case DATE:
            NumericUtils.longToPrefixCodedBytes(val.longValue(), 0, bytes);
            break;
        case DOUBLE:
            NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes);
            break;
        default:/*from w ww .j ava  2s  .  c o  m*/
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "Unknown type for trie field: " + f.name());
        }
    } else {
        // the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding:
        final BytesRef bytesRef = f.binaryValue();
        if (bytesRef == null)
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "Invalid field contents: " + f.name());
        switch (type) {
        case INTEGER:
            NumericUtils.intToPrefixCodedBytes(toInt(bytesRef.bytes, bytesRef.offset), 0, bytes);
            break;
        case FLOAT: {
            // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
            // copied from NumericUtils to not convert to/from float two times
            // code in next 2 lines is identical to: int v = NumericUtils.floatToSortableInt(Float.intBitsToFloat(toInt(arr)));
            int v = toInt(bytesRef.bytes, bytesRef.offset);
            if (v < 0)
                v ^= 0x7fffffff;
            NumericUtils.intToPrefixCodedBytes(v, 0, bytes);
            break;
        }
        case LONG: //fallthrough!
        case DATE:
            NumericUtils.longToPrefixCodedBytes(toLong(bytesRef.bytes, bytesRef.offset), 0, bytes);
            break;
        case DOUBLE: {
            // WARNING: Code Duplication! Keep in sync with o.a.l.util.NumericUtils!
            // copied from NumericUtils to not convert to/from double two times
            // code in next 2 lines is identical to: long v = NumericUtils.doubleToSortableLong(Double.longBitsToDouble(toLong(arr)));
            long v = toLong(bytesRef.bytes, bytesRef.offset);
            if (v < 0)
                v ^= 0x7fffffffffffffffL;
            NumericUtils.longToPrefixCodedBytes(v, 0, bytes);
            break;
        }
        default:
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                    "Unknown type for trie field: " + f.name());
        }
    }
    return bytes.utf8ToString();
}

From source file:org.apache.solr.search.CitationLRUCache.java

License:Apache License

private void unInvertedTheDamnThing(AtomicReader reader, Map<String, List<String>> fields, Bits liveDocs,
        KVSetter setter) throws IOException {

    if (liveDocs == null) {
        liveDocs = reader.getLiveDocs();
    }/*from   ww w  .  j  av  a  2s.  c o m*/

    int docBase = reader.getContext().docBase;
    //System.out.println("***REBUILDING***");
    //System.out.println("Generating mapping from: " + reader.toString() + " docBase=" + docBase);

    // load multiple values->idlucene mapping
    for (String idField : fields.get("intFieldsMV")) {
        DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField);
        TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader);
        if (termsEnum == null) {
            continue;
        }
        DocsEnum docs = null;
        for (;;) {
            BytesRef term = termsEnum.next();
            if (term == null)
                break;

            Integer t = FieldCache.DEFAULT_INT_PARSER.parseInt(term);

            docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq
            int i = 0;
            for (;;) {
                int d = docs.nextDoc();
                if (d == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }

                setter.set(docBase, d, treatIdentifiersAsText ? Integer.toString(t) : t);

                i += 1;
                //if (i > 1) {
                //   log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase);
                //}
            }
        }
    }

    /*
     * Read every term
     *    - for each term get all live documents
     *       - and do something with the pair: (docid, term)
     */
    for (String idField : fields.get("textFieldsMV")) {
        DocTermOrds unInvertedIndex = new DocTermOrds(reader, liveDocs, idField);
        TermsEnum termsEnum = unInvertedIndex.getOrdTermsEnum(reader);
        if (termsEnum == null) {
            continue;
        }
        DocsEnum docs = null;
        for (;;) {
            BytesRef term = termsEnum.next();
            if (term == null)
                break;
            String t = term.utf8ToString();

            docs = termsEnum.docs(liveDocs, docs, 0); // we don't need docFreq
            for (;;) {
                int d = docs.nextDoc();
                if (d == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }

                setter.set(docBase, d, t);

                //if (i > 1) {
                //   log.warn("The term {} is used by more than one document {} ; your cache has problems", t, d+docBase);
                //}
            }
        }
    }

    // load single valued ids 
    for (String idField : fields.get("textFields")) {
        BinaryDocValues idMapping = getCacheReuseExisting(reader, idField);

        Integer i = 0;
        BytesRef ret = new BytesRef();
        while (i < reader.maxDoc()) {
            if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
                //System.out.println("skipping: " + i);
                i++;
                continue;
            }
            idMapping.get(i, ret);
            if (ret.length > 0) {
                setter.set(docBase, i, ret.utf8ToString()); // in this case, docbase will always be 0
            }
            i++;
        }
        if (purgeCache)
            FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
    }
    for (String idField : fields.get("intFields")) {
        Ints idMapping = FieldCache.DEFAULT.getInts(reader, idField, false);
        Integer i = 0;
        while (i < reader.maxDoc()) {
            if (liveDocs != null && !(i < liveDocs.length() && liveDocs.get(i))) {
                //System.out.println("skipping: " + i);
                i++;
                continue;
            }
            setter.set(docBase, i,
                    treatIdentifiersAsText ? Integer.toString(idMapping.get(i)) : idMapping.get(i));
            i++;
        }
    }

}

From source file:org.apache.solr.search.QueryParsing.java

License:Apache License

static void writeFieldVal(BytesRef val, FieldType ft, Appendable out, int flags) throws IOException {
    if (ft != null) {
        try {//from   w w w .  j a  va2  s  . com
            CharsRef readable = new CharsRef();
            ft.indexedToReadable(val, readable);
            out.append(readable);
        } catch (Exception e) {
            out.append("EXCEPTION(val=");
            out.append(val.utf8ToString());
            out.append(")");
        }
    } else {
        out.append(val.utf8ToString());
    }
}

From source file:org.apache.solr.search.SolrDocumentFetcher.java

License:Apache License

/**
 * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
 *
 * @param doc/*w  w w .j a v  a2  s . c o m*/
 *          A SolrDocument or SolrInputDocument instance where docValues will be added
 * @param docid
 *          The lucene docid of the document to be populated
 * @param fields
 *          The list of docValues fields to be decorated
 */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid,
        Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            continue; // Searcher doesn't have info about this field, hence ignore it.
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch (dvType) {
        case NUMERIC:
            final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
            if (ndv == null) {
                continue;
            }
            Long val;
            if (ndv.advanceExact(localId)) {
                val = ndv.longValue();
            } else {
                continue;
            }
            Object newVal = val;
            if (schemaField.getType().isPointField()) {
                // TODO: Maybe merge PointField with TrieFields here
                NumberType type = schemaField.getType().getNumberType();
                switch (type) {
                case INTEGER:
                    newVal = val.intValue();
                    break;
                case LONG:
                    newVal = val.longValue();
                    break;
                case FLOAT:
                    newVal = Float.intBitsToFloat(val.intValue());
                    break;
                case DOUBLE:
                    newVal = Double.longBitsToDouble(val);
                    break;
                case DATE:
                    newVal = new Date(val);
                    break;
                default:
                    throw new AssertionError("Unexpected PointType: " + type);
                }
            } else {
                if (schemaField.getType() instanceof TrieIntField) {
                    newVal = val.intValue();
                } else if (schemaField.getType() instanceof TrieFloatField) {
                    newVal = Float.intBitsToFloat(val.intValue());
                } else if (schemaField.getType() instanceof TrieDoubleField) {
                    newVal = Double.longBitsToDouble(val);
                } else if (schemaField.getType() instanceof TrieDateField) {
                    newVal = new Date(val);
                } else if (schemaField.getType() instanceof EnumField) {
                    newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                }
            }
            doc.addField(fieldName, newVal);
            break;
        case BINARY:
            BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
            if (bdv == null) {
                continue;
            }
            BytesRef value;
            if (bdv.advanceExact(localId)) {
                value = BytesRef.deepCopyOf(bdv.binaryValue());
            } else {
                continue;
            }
            doc.addField(fieldName, value);
            break;
        case SORTED:
            SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
            if (sdv == null) {
                continue;
            }
            if (sdv.advanceExact(localId)) {
                final BytesRef bRef = sdv.binaryValue();
                // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                if (schemaField.getType() instanceof BoolField) {
                    doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                } else {
                    doc.addField(fieldName, bRef.utf8ToString());
                }
            }
            break;
        case SORTED_NUMERIC:
            final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
            NumberType type = schemaField.getType().getNumberType();
            if (numericDv != null) {
                if (numericDv.advance(localId) == localId) {
                    final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                    for (int i = 0; i < numericDv.docValueCount(); i++) {
                        long number = numericDv.nextValue();
                        switch (type) {
                        case INTEGER:
                            outValues.add((int) number);
                            break;
                        case LONG:
                            outValues.add(number);
                            break;
                        case FLOAT:
                            outValues.add(NumericUtils.sortableIntToFloat((int) number));
                            break;
                        case DOUBLE:
                            outValues.add(NumericUtils.sortableLongToDouble(number));
                            break;
                        case DATE:
                            outValues.add(new Date(number));
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                        }
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case SORTED_SET:
            final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
            if (values != null && values.getValueCount() > 0) {
                if (values.advance(localId) == localId) {
                    final List<Object> outValues = new LinkedList<>();
                    for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values
                            .nextOrd()) {
                        value = values.lookupOrd(ord);
                        outValues.add(schemaField.getType().toObject(schemaField, value));
                    }
                    assert outValues.size() > 0;
                    doc.addField(fieldName, outValues);
                }
            }
        case NONE:
            break;
        }
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        throws Exception {

    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE,
            TestUtil.nextInt(random(), 2, 10));

    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    /*/* w ww  .j  av  a2  s . c  o  m*/
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }

    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse(
                            "term=" + termsEnum.term().utf8ToString() + " matches prefix="
                                    + prefixRef.utf8ToString(),
                            StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                    // ok
                }
            } else {
                // ok
            }
        }
        return;
    }

    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }

        if (VERBOSE) {
            System.out.println(
                    "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord="
                    + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testActuallySingleValued() throws IOException {
    Directory dir = newDirectory();// ww  w  .  j av  a 2 s  .  co  m
    IndexWriterConfig iwconfig = newIndexWriterConfig(null);
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
    assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(3, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals("bar", value.utf8ToString());

    value = v.lookupOrd(1);
    assertEquals("baz", value.utf8ToString());

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

public void testDocValuesIntegration() throws Exception {
    Directory dir = newDirectory();/*from  ww w .  j a v  a  2  s.  c om*/
    IndexWriterConfig iwc = newIndexWriterConfig(null);
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
    doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
    doc.add(new NumericDocValuesField("numeric", 42));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
    doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
    iw.addDocument(doc);
    DirectoryReader ir = iw.getReader();
    iw.close();
    LeafReader ar = getOnlyLeafReader(ir);

    // Binary type: can be retrieved via getTerms()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER);
    });

    BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary");
    assertEquals(0, binary.nextDoc());
    final BytesRef term = binary.binaryValue();
    assertEquals("binary value", term.utf8ToString());

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "binary");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "binary");
    });

    Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
    assertTrue(bits.get(0));

    // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sorted");
    });

    binary = FieldCache.DEFAULT.getTerms(ar, "sorted");
    assertEquals(0, binary.nextDoc());

    BytesRef scratch = binary.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());

    SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
    assertEquals(0, sorted.nextDoc());
    assertEquals(0, sorted.ordValue());
    assertEquals(1, sorted.getValueCount());
    scratch = sorted.binaryValue();
    assertEquals("sorted value", scratch.utf8ToString());

    SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(1, sortedSet.getValueCount());

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
    assertTrue(bits.get(0));

    // Numeric type: can be retrieved via getInts() and so on
    NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER);
    assertEquals(0, numeric.nextDoc());
    assertEquals(42, numeric.longValue());

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "numeric");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "numeric");
    });

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
    assertTrue(bits.get(0));

    // SortedSet type: can be retrieved via getDocTermOrds() 
    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER);
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTerms(ar, "sortedset");
    });

    expectThrows(IllegalStateException.class, () -> {
        FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
    });

    expectThrows(IllegalStateException.class, () -> {
        new DocTermOrds(ar, null, "sortedset");
    });

    sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
    assertEquals(0, sortedSet.nextDoc());
    assertEquals(0, sortedSet.nextOrd());
    assertEquals(1, sortedSet.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
    assertEquals(2, sortedSet.getValueCount());

    bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
    assertTrue(bits.get(0));

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheSortRandom.java

License:Apache License

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());

    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out//from  w  w  w.ja v a2  s. c  om
                .println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }

    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();

        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }

            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }

            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }

            doc.add(new StringField("stringdv", s, Field.Store.NO));
            docValues.add(new BytesRef(s));

        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }

        doc.add(new IntPoint("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    Map<String, UninvertingReader.Type> mapping = new HashMap<>();
    mapping.put("stringdv", Type.SORTED);
    mapping.put("id", Type.INTEGER_POINT);
    final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }

    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();

        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        final boolean missingIsNull;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        missingIsNull = true;

        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }

        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        int queryType = random.nextInt(2);
        if (queryType == 0) {
            hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(),
                    random.nextBoolean());
        } else {
            hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        }

        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount
                    + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }

        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {
            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });

        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                if (br == null && missingIsNull == false) {
                    br = new BytesRef();
                }
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }

        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];

                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString())
                        + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            if (br == null && missingIsNull == false) {
                br = new BytesRef();
            }

            // Normally, the old codecs (that don't support
            // docsWithField via doc values) will always return
            // an empty BytesRef for the missing case; however,
            // if all docs in a given segment were missing, in
            // that case it will return null!  So we must map
            // null here, too:
            BytesRef br2 = (BytesRef) fd.fields[0];
            if (br2 == null && missingIsNull == false) {
                br2 = new BytesRef();
            }

            assertEquals(br, br2);
        }
    }

    r.close();
    dir.close();
}

From source file:org.apache.solr.util.NumberUtils.java

License:Apache License

public static String SortableStr2int(BytesRef val) {
    // TODO: operate directly on BytesRef
    return SortableStr2int(val.utf8ToString());
}