List of usage examples for org.apache.lucene.document SortedSetDocValuesField SortedSetDocValuesField
public SortedSetDocValuesField(String name, BytesRef bytes)
From source file:IndexTaxis.java
License:Apache License
static void addOneField(Document doc, String fieldName, String rawValue) { // nocommit/*from w w w . ja v a 2 s. c o m*/ /* if (fieldName.equals("pick_up_lat")) { double value = Double.parseDouble(rawValue); doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } */ switch (fieldName) { case "vendor_id": case "cab_color": case "payment_type": case "trip_type": case "rate_code": case "store_and_fwd_flag": doc.add(new StringField(fieldName, rawValue, Field.Store.NO)); doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue))); break; case "vendor_name": doc.add(new TextField(fieldName, rawValue, Field.Store.NO)); break; case "pick_up_date_time": case "drop_off_date_time": { long value = Long.parseLong(rawValue); doc.add(new LongPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "passenger_count": { int value = Integer.parseInt(rawValue); doc.add(new IntPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "trip_distance": case "pick_up_lat": case "pick_up_lon": case "drop_off_lat": case "drop_off_lon": case "fare_amount": case "surcharge": case "mta_tax": case "extra": case "ehail_fee": case "improvement_surcharge": case "tip_amount": case "tolls_amount": case "total_amount": { double value; try { value = Double.parseDouble(rawValue); } catch (NumberFormatException nfe) { System.out.println( "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\""); return; } doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } break; default: throw new AssertionError("failed to handle field \"" + fieldName + "\""); } }
From source file:com.github.tteofili.looseen.TestWikipediaClassification.java
License:Apache License
private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception { long start = System.currentTimeMillis(); int count = 0; System.out.format("Importing %s...%n", dump); String title = null;/*ww w .j a v a2 s .c om*/ String text = null; Set<String> cats = new HashSet<>(); XMLInputFactory factory = XMLInputFactory.newInstance(); StreamSource source; if (dump.getName().endsWith(".xml")) { source = new StreamSource(dump); } else { throw new RuntimeException("can index only wikipedia XML files"); } XMLStreamReader reader = factory.createXMLStreamReader(source); while (reader.hasNext()) { if (count == Integer.MAX_VALUE) { break; } switch (reader.next()) { case XMLStreamConstants.START_ELEMENT: if ("title".equals(reader.getLocalName())) { title = reader.getElementText(); } else if (TEXT_FIELD.equals(reader.getLocalName())) { text = reader.getElementText(); Matcher matcher = pattern.matcher(text); int pos = 0; while (matcher.find(pos)) { String group = matcher.group(1); String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", ""); Collections.addAll(cats, catName.split("\\|")); pos = matcher.end(); } } break; case XMLStreamConstants.END_ELEMENT: if ("page".equals(reader.getLocalName())) { Document page = new Document(); if (title != null) { page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES)); } if (text != null) { page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES)); } for (String cat : cats) { page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES)); page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat))); } indexWriter.addDocument(page); cats.clear(); count++; if (count % 100000 == 0) { indexWriter.commit(); System.out.format("Committed %d pages%n", count); } } break; } } indexWriter.commit(); long millis = System.currentTimeMillis() - start; System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000, (double) millis / count); }
From source file:com.qwazr.search.field.SortedSetDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { if (value instanceof BytesRef) consumer.accept(new SortedSetDocValuesField(fieldName, (BytesRef) value)); else//from w ww. j ava 2 s. com consumer.accept(new SortedSetDocValuesField(fieldName, new BytesRef(value.toString()))); }
From source file:com.stratio.cassandra.lucene.schema.mapping.KeywordMapper.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w. j a v a 2 s . com public Field sortedField(String name, String value, boolean isCollection) { BytesRef bytes = new BytesRef(value); if (isCollection) { return new SortedSetDocValuesField(name, bytes); } else { return new SortedDocValuesField(name, bytes); } }
From source file:com.twentyn.patentSearch.DocumentIndexer.java
License:Open Source License
public Document patentDocToLuceneDoc(File path, PatentDocument patentDoc) { // With help from https://lucene.apache.org/core/5_2_0/demo/src-html/org/apache/lucene/demo/IndexFiles.html Document doc = new Document(); doc.add(new StringField("file_name", path.getName(), Field.Store.YES)); doc.add(new StringField("id", patentDoc.getFileId(), Field.Store.YES)); doc.add(new StringField("grant_date", patentDoc.getGrantDate(), Field.Store.YES)); doc.add(new StringField("main_classification", patentDoc.getMainClassification(), Field.Store.YES)); doc.add(new TextField("title", patentDoc.getTitle(), Field.Store.YES)); doc.add(new TextField("claims", StringUtils.join("\n", patentDoc.getClaimsText()), Field.Store.NO)); doc.add(new TextField("description", StringUtils.join("\n", patentDoc.getTextContent()), Field.Store.NO)); // TODO: verify that these are searchable as expected. for (String cls : patentDoc.getFurtherClassifications()) { doc.add(new SortedSetDocValuesField("further_classification", new BytesRef(cls))); }// w w w .j a va 2s . co m for (String cls : patentDoc.getSearchedClassifications()) { doc.add(new SortedSetDocValuesField("searched_classification", new BytesRef(cls))); } return doc; }
From source file:io.crate.expression.reference.doc.IpColumnReferenceTest.java
License:Apache License
private static void addIPv6Values(IndexWriter writer) throws IOException { for (int i = 10; i < 20; i++) { Document doc = new Document(); doc.add(new StringField("_id", Integer.toString(i), Field.Store.NO)); InetAddress address = InetAddresses .forString("7bd0:8082:2df8:487e:e0df:e7b5:9362:" + Integer.toHexString(i)); doc.add(new SortedSetDocValuesField(IP_COLUMN, new BytesRef(InetAddressPoint.encode(address)))); writer.addDocument(doc);//from ww w .ja va2 s .co m } }
From source file:io.crate.expression.reference.doc.IpColumnReferenceTest.java
License:Apache License
private static void addIPv4Values(IndexWriter writer) throws IOException { for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.add(new StringField("_id", Integer.toString(i), Field.Store.NO)); InetAddress address = InetAddresses.forString("192.168.0." + i); doc.add(new SortedSetDocValuesField(IP_COLUMN, new BytesRef(InetAddressPoint.encode(address)))); if (i == 0) { address = InetAddresses.forString("192.168.0.1"); doc.add(new SortedSetDocValuesField(IP_ARRAY_COLUMN, new BytesRef(InetAddressPoint.encode(address)))); address = InetAddresses.forString("192.168.0.2"); doc.add(new SortedSetDocValuesField(IP_ARRAY_COLUMN, new BytesRef(InetAddressPoint.encode(address)))); }/*ww w . java 2 s . com*/ writer.addDocument(doc); } }
From source file:io.crate.operation.reference.doc.IpColumnReferenceTest.java
License:Apache License
@Override protected void insertValues(IndexWriter writer) throws Exception { for (int i = 0; i < 10; i++) { Document doc = new Document(); InetAddress address = InetAddresses.forString("192.168.0." + i); doc.add(new SortedSetDocValuesField(column, new BytesRef(InetAddressPoint.encode(address)))); if (i == 0) { address = InetAddresses.forString("192.168.0.1"); doc.add(new SortedSetDocValuesField(column_array, new BytesRef(InetAddressPoint.encode(address)))); address = InetAddresses.forString("192.168.0.2"); doc.add(new SortedSetDocValuesField(column_array, new BytesRef(InetAddressPoint.encode(address)))); }//from w w w .ja v a 2 s . c om writer.addDocument(doc); } }
From source file:lucene.security.index.SecureAtomicReaderTestBase.java
License:Apache License
private Iterable<IndexableField> getDoc(int i) { Document document = new Document(); document.add(new StringField("test", "test", Store.YES)); document.add(new StringField("info", "info", Store.YES)); if (i == 3) { document.add(new StringField("shouldnotsee", "shouldnotsee", Store.YES)); }/*from w w w.ja v a2 s . com*/ document.add(new NumericDocValuesField("number", i)); document.add(new BinaryDocValuesField("bin", new BytesRef(Integer.toString(i).getBytes()))); document.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(i).getBytes()))); document.add(new SortedSetDocValuesField("sortedset", new BytesRef(Integer.toString(i).getBytes()))); document.add( new SortedSetDocValuesField("sortedset", new BytesRef(("0" + Integer.toString(i)).getBytes()))); return document; }
From source file:org.apache.solr.schema.SortableBinaryField.java
License:Apache License
@Override public List<IndexableField> createFields(SchemaField field, Object value, float boost) { if (field.hasDocValues()) { List<IndexableField> fields = new ArrayList<>(); IndexableField storedField = createField(field, value, boost); fields.add(storedField);//from w ww .jav a 2 s . c om ByteBuffer byteBuffer = toObject(storedField); BytesRef bytes = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset() + byteBuffer.position(), byteBuffer.remaining()); if (field.multiValued()) { fields.add(new SortedSetDocValuesField(field.getName(), bytes)); } else { fields.add(new SortedDocValuesField(field.getName(), bytes)); } return fields; } else { return Collections.singletonList(createField(field, value, boost)); } }