Example usage for org.apache.lucene.document SortedSetDocValuesField SortedSetDocValuesField

List of usage examples for org.apache.lucene.document SortedSetDocValuesField SortedSetDocValuesField

Introduction

In this page you can find the example usage for org.apache.lucene.document SortedSetDocValuesField SortedSetDocValuesField.

Prototype

public SortedSetDocValuesField(String name, BytesRef bytes) 

Source Link

Document

Create a new sorted DocValues field.

Usage

From source file:IndexTaxis.java

License:Apache License

static void addOneField(Document doc, String fieldName, String rawValue) {
    // nocommit/*from   w w  w . ja  v  a 2 s.  c  o  m*/
    /*
    if (fieldName.equals("pick_up_lat")) {
      double value = Double.parseDouble(rawValue);
      doc.add(new DoublePoint(fieldName, value));
      doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
    */
    switch (fieldName) {
    case "vendor_id":
    case "cab_color":
    case "payment_type":
    case "trip_type":
    case "rate_code":
    case "store_and_fwd_flag":
        doc.add(new StringField(fieldName, rawValue, Field.Store.NO));
        doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue)));
        break;
    case "vendor_name":
        doc.add(new TextField(fieldName, rawValue, Field.Store.NO));
        break;
    case "pick_up_date_time":
    case "drop_off_date_time": {
        long value = Long.parseLong(rawValue);
        doc.add(new LongPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "passenger_count": {
        int value = Integer.parseInt(rawValue);
        doc.add(new IntPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "trip_distance":
    case "pick_up_lat":
    case "pick_up_lon":
    case "drop_off_lat":
    case "drop_off_lon":
    case "fare_amount":
    case "surcharge":
    case "mta_tax":
    case "extra":
    case "ehail_fee":
    case "improvement_surcharge":
    case "tip_amount":
    case "tolls_amount":
    case "total_amount": {
        double value;
        try {
            value = Double.parseDouble(rawValue);
        } catch (NumberFormatException nfe) {
            System.out.println(
                    "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\"");
            return;
        }
        doc.add(new DoublePoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
        break;
    default:
        throw new AssertionError("failed to handle field \"" + fieldName + "\"");
    }
}

From source file:com.github.tteofili.looseen.TestWikipediaClassification.java

License:Apache License

private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception {
    long start = System.currentTimeMillis();
    int count = 0;
    System.out.format("Importing %s...%n", dump);

    String title = null;/*ww w .j a v  a2  s .c  om*/
    String text = null;
    Set<String> cats = new HashSet<>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    StreamSource source;
    if (dump.getName().endsWith(".xml")) {
        source = new StreamSource(dump);
    } else {
        throw new RuntimeException("can index only wikipedia XML files");
    }
    XMLStreamReader reader = factory.createXMLStreamReader(source);
    while (reader.hasNext()) {
        if (count == Integer.MAX_VALUE) {
            break;
        }
        switch (reader.next()) {
        case XMLStreamConstants.START_ELEMENT:
            if ("title".equals(reader.getLocalName())) {
                title = reader.getElementText();
            } else if (TEXT_FIELD.equals(reader.getLocalName())) {
                text = reader.getElementText();
                Matcher matcher = pattern.matcher(text);
                int pos = 0;
                while (matcher.find(pos)) {
                    String group = matcher.group(1);
                    String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", "");
                    Collections.addAll(cats, catName.split("\\|"));
                    pos = matcher.end();
                }
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            if ("page".equals(reader.getLocalName())) {
                Document page = new Document();
                if (title != null) {
                    page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES));
                }
                if (text != null) {
                    page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES));
                }
                for (String cat : cats) {
                    page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES));
                    page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat)));
                }
                indexWriter.addDocument(page);
                cats.clear();
                count++;
                if (count % 100000 == 0) {
                    indexWriter.commit();
                    System.out.format("Committed %d pages%n", count);
                }
            }
            break;
        }
    }

    indexWriter.commit();

    long millis = System.currentTimeMillis() - start;
    System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000,
            (double) millis / count);
}

From source file:com.qwazr.search.field.SortedSetDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    if (value instanceof BytesRef)
        consumer.accept(new SortedSetDocValuesField(fieldName, (BytesRef) value));
    else//from w ww. j  ava  2  s.  com
        consumer.accept(new SortedSetDocValuesField(fieldName, new BytesRef(value.toString())));
}

From source file:com.stratio.cassandra.lucene.schema.mapping.KeywordMapper.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w  w  w. j a  v  a  2  s .  com
public Field sortedField(String name, String value, boolean isCollection) {
    BytesRef bytes = new BytesRef(value);
    if (isCollection) {
        return new SortedSetDocValuesField(name, bytes);
    } else {
        return new SortedDocValuesField(name, bytes);
    }
}

From source file:com.twentyn.patentSearch.DocumentIndexer.java

License:Open Source License

public Document patentDocToLuceneDoc(File path, PatentDocument patentDoc) {
    // With help from https://lucene.apache.org/core/5_2_0/demo/src-html/org/apache/lucene/demo/IndexFiles.html
    Document doc = new Document();
    doc.add(new StringField("file_name", path.getName(), Field.Store.YES));
    doc.add(new StringField("id", patentDoc.getFileId(), Field.Store.YES));
    doc.add(new StringField("grant_date", patentDoc.getGrantDate(), Field.Store.YES));
    doc.add(new StringField("main_classification", patentDoc.getMainClassification(), Field.Store.YES));
    doc.add(new TextField("title", patentDoc.getTitle(), Field.Store.YES));
    doc.add(new TextField("claims", StringUtils.join("\n", patentDoc.getClaimsText()), Field.Store.NO));
    doc.add(new TextField("description", StringUtils.join("\n", patentDoc.getTextContent()), Field.Store.NO));

    // TODO: verify that these are searchable as expected.
    for (String cls : patentDoc.getFurtherClassifications()) {
        doc.add(new SortedSetDocValuesField("further_classification", new BytesRef(cls)));
    }//  w w w  .j  a va  2s  . co  m
    for (String cls : patentDoc.getSearchedClassifications()) {
        doc.add(new SortedSetDocValuesField("searched_classification", new BytesRef(cls)));
    }

    return doc;
}

From source file:io.crate.expression.reference.doc.IpColumnReferenceTest.java

License:Apache License

private static void addIPv6Values(IndexWriter writer) throws IOException {
    for (int i = 10; i < 20; i++) {
        Document doc = new Document();
        doc.add(new StringField("_id", Integer.toString(i), Field.Store.NO));
        InetAddress address = InetAddresses
                .forString("7bd0:8082:2df8:487e:e0df:e7b5:9362:" + Integer.toHexString(i));
        doc.add(new SortedSetDocValuesField(IP_COLUMN, new BytesRef(InetAddressPoint.encode(address))));
        writer.addDocument(doc);//from   ww w  .ja va2 s .co m
    }
}

From source file:io.crate.expression.reference.doc.IpColumnReferenceTest.java

License:Apache License

private static void addIPv4Values(IndexWriter writer) throws IOException {
    for (int i = 0; i < 10; i++) {
        Document doc = new Document();
        doc.add(new StringField("_id", Integer.toString(i), Field.Store.NO));
        InetAddress address = InetAddresses.forString("192.168.0." + i);
        doc.add(new SortedSetDocValuesField(IP_COLUMN, new BytesRef(InetAddressPoint.encode(address))));
        if (i == 0) {
            address = InetAddresses.forString("192.168.0.1");
            doc.add(new SortedSetDocValuesField(IP_ARRAY_COLUMN,
                    new BytesRef(InetAddressPoint.encode(address))));
            address = InetAddresses.forString("192.168.0.2");
            doc.add(new SortedSetDocValuesField(IP_ARRAY_COLUMN,
                    new BytesRef(InetAddressPoint.encode(address))));
        }/*ww  w . java 2  s  . com*/
        writer.addDocument(doc);
    }
}

From source file:io.crate.operation.reference.doc.IpColumnReferenceTest.java

License:Apache License

@Override
protected void insertValues(IndexWriter writer) throws Exception {
    for (int i = 0; i < 10; i++) {
        Document doc = new Document();
        InetAddress address = InetAddresses.forString("192.168.0." + i);
        doc.add(new SortedSetDocValuesField(column, new BytesRef(InetAddressPoint.encode(address))));
        if (i == 0) {
            address = InetAddresses.forString("192.168.0.1");
            doc.add(new SortedSetDocValuesField(column_array, new BytesRef(InetAddressPoint.encode(address))));
            address = InetAddresses.forString("192.168.0.2");
            doc.add(new SortedSetDocValuesField(column_array, new BytesRef(InetAddressPoint.encode(address))));
        }//from w w w .ja v  a 2  s  . c  om
        writer.addDocument(doc);
    }
}

From source file:lucene.security.index.SecureAtomicReaderTestBase.java

License:Apache License

private Iterable<IndexableField> getDoc(int i) {
    Document document = new Document();
    document.add(new StringField("test", "test", Store.YES));
    document.add(new StringField("info", "info", Store.YES));
    if (i == 3) {
        document.add(new StringField("shouldnotsee", "shouldnotsee", Store.YES));
    }/*from w w  w.ja  v  a2 s  . com*/
    document.add(new NumericDocValuesField("number", i));
    document.add(new BinaryDocValuesField("bin", new BytesRef(Integer.toString(i).getBytes())));
    document.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(i).getBytes())));
    document.add(new SortedSetDocValuesField("sortedset", new BytesRef(Integer.toString(i).getBytes())));
    document.add(
            new SortedSetDocValuesField("sortedset", new BytesRef(("0" + Integer.toString(i)).getBytes())));
    return document;
}

From source file:org.apache.solr.schema.SortableBinaryField.java

License:Apache License

@Override
public List<IndexableField> createFields(SchemaField field, Object value, float boost) {
    if (field.hasDocValues()) {
        List<IndexableField> fields = new ArrayList<>();
        IndexableField storedField = createField(field, value, boost);
        fields.add(storedField);//from   w ww .jav  a 2 s .  c  om
        ByteBuffer byteBuffer = toObject(storedField);
        BytesRef bytes = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset() + byteBuffer.position(),
                byteBuffer.remaining());
        if (field.multiValued()) {
            fields.add(new SortedSetDocValuesField(field.getName(), bytes));
        } else {
            fields.add(new SortedDocValuesField(field.getName(), bytes));
        }
        return fields;
    } else {
        return Collections.singletonList(createField(field, value, boost));
    }
}