List of usage examples for org.apache.lucene.document SortedNumericDocValuesField SortedNumericDocValuesField
public SortedNumericDocValuesField(String name, long value)
From source file:IndexTaxis.java
License:Apache License
static void addOneField(Document doc, String fieldName, String rawValue) { // nocommit/*w w w . j a va 2 s .c o m*/ /* if (fieldName.equals("pick_up_lat")) { double value = Double.parseDouble(rawValue); doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } */ switch (fieldName) { case "vendor_id": case "cab_color": case "payment_type": case "trip_type": case "rate_code": case "store_and_fwd_flag": doc.add(new StringField(fieldName, rawValue, Field.Store.NO)); doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue))); break; case "vendor_name": doc.add(new TextField(fieldName, rawValue, Field.Store.NO)); break; case "pick_up_date_time": case "drop_off_date_time": { long value = Long.parseLong(rawValue); doc.add(new LongPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "passenger_count": { int value = Integer.parseInt(rawValue); doc.add(new IntPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "trip_distance": case "pick_up_lat": case "pick_up_lon": case "drop_off_lat": case "drop_off_lon": case "fare_amount": case "surcharge": case "mta_tax": case "extra": case "ehail_fee": case "improvement_surcharge": case "tip_amount": case "tolls_amount": case "total_amount": { double value; try { value = Double.parseDouble(rawValue); } catch (NumberFormatException nfe) { System.out.println( "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\""); return; } doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } break; default: throw new AssertionError("failed to handle field \"" + fieldName + "\""); } }
From source file:com.icdd.lucence.IndexFiles.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w .j a v a 2 s . c om*/ doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.icdd.lucene.CreateIndex.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { // filter non-xml files if (filter.accept(file.toFile())) { System.out.println("num: " + num); num++;// w w w .jav a 2 s . c om if (num < endset && num >= offset) { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); String filename = file.getFileName().toString(); int post = filename.indexOf('_'); if (post > 0) { filename = filename.substring(post + 1, filename.length() - 4); } doc.add(pathField); doc.add(new StringField("title", filename, Field.Store.YES)); doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document // can // be there): logger.info("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been // indexed) so // path, if present: logger.info("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } } } }
From source file:com.qwazr.search.field.SortedDoubleDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { if (value instanceof Number) consumer.accept(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(((Number) value).doubleValue()))); else/*w ww.ja v a 2s .c om*/ consumer.accept(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(Double.parseDouble(value.toString())))); }
From source file:com.qwazr.search.field.SortedFloatDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { if (value instanceof Number) consumer.accept(new SortedNumericDocValuesField(fieldName, NumericUtils.floatToSortableInt(((Number) value).floatValue()))); else// ww w . ja v a 2s.com consumer.accept(new SortedNumericDocValuesField(fieldName, NumericUtils.floatToSortableInt(Float.parseFloat(value.toString())))); }
From source file:com.qwazr.search.field.SortedIntDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { if (value instanceof Number) consumer.accept(new SortedNumericDocValuesField(fieldName, ((Number) value).intValue())); else/* ww w. j a va 2 s. c o m*/ consumer.accept(new SortedNumericDocValuesField(fieldName, Integer.parseInt(value.toString()))); }
From source file:com.qwazr.search.field.SortedLongDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { if (value instanceof Number) consumer.accept(new SortedNumericDocValuesField(fieldName, ((Number) value).longValue())); else//w w w. j a v a2 s .c om consumer.accept(new SortedNumericDocValuesField(fieldName, Long.parseLong(value.toString()))); }
From source file:com.rapidminer.search.GlobalSearchUtilities.java
License:Open Source License
/** * Creates a search document {@link Field} for sorting with long values. * * @param value//from w ww. j av a 2 s .c o m * the long value. Higher values are sorted higher than lower values * @return the field which is used for sorting, never {@code null} */ public Field createSortingField(final long value) { return new SortedNumericDocValuesField("sort", value); }
From source file:gov.nist.basekb.FreebaseIndexer.java
License:LGPL
public void indexRecord(IndexWriter writer, String subject, Map<String, List<String>> predValues) throws Exception { Document doc = new Document(); Field subjField = new StringField(FIELD_NAME_SUBJECT, normalizeUri(subject), Field.Store.YES); //printlnDbg("DBG: indexRecord: " + subject); doc.add(subjField);// w w w . j a v a 2 s.c o m FieldType IndexedField = new FieldType(TextField.TYPE_NOT_STORED); IndexedField.setStoreTermVectors(INDEX_TERMVECTORS); FieldType IndexedStoredField = new FieldType(TextField.TYPE_STORED); IndexedStoredField.setStoreTermVectors(INDEX_TERMVECTORS); long pagerank = 0; for (Map.Entry<String, List<String>> entry : predValues.entrySet()) { String predicate = normalizeUri(entry.getKey()); List<String> values = entry.getValue(); //printlnDbg("DBG: " + predicate + ": " + values); for (String value : values) { String valueType = getValueType(value); value = normalizeValue(value); if (isIndexedPredicate(predicate)) { if (valueType == VALUE_TYPE_URI) // treat URI elements as atomic strings (e.g., types): doc.add(new Field(predicate, value, StoredField.TYPE)); else if (valueType == VALUE_TYPE_INT) if (predicate.equals("pr_bin")) pagerank = Long.parseLong(value); else doc.add(new SortedNumericDocValuesField(predicate, Long.parseLong(value))); else { // all others, run through the analyzer: String lang = INDEX_LANGUAGE ? getValueLanguage(value) : null; if (INDEX_LANGUAGE && lang != null && (isSupportedLanguage(lang) || isSupportedLanguage(getLanguageRoot(lang)))) { // multi-lingual indexing: if we have a supported language, we store the field // and then add an index entry with the language-qualified predicate: doc.add(new Field(predicate, value, StoredField.TYPE)); if (INDEX_PREDICATES) doc.add(new Field(languageQualifiedPredicate(predicate, lang), value, IndexedStoredField)); if (INDEX_TEXT) doc.add(new Field(languageQualifiedPredicate(FIELD_NAME_TEXT, lang), value, IndexedField)); } else { // mono-lingual indexing or no language designation: if (INDEX_PREDICATES) doc.add(new Field(predicate, value, IndexedStoredField)); if (INDEX_TEXT) { doc.add(new Field(FIELD_NAME_TEXT, value, IndexedField)); // make sure we store the triple if it wasn't already: if (!INDEX_PREDICATES) doc.add(new Field(predicate, value, StoredField.TYPE)); } } } } else doc.add(new Field(predicate, value, StoredField.TYPE)); } } doc.add(new SortedNumericDocValuesField("pr_bin", pagerank)); Labeling lab = classify(doc, classifier); doc.add(new Field("best_class", lab.getBestLabel().toString(), StoredField.TYPE)); doc.add(new Field("all_classes", labelsToString(lab), StoredField.TYPE)); // we are creating the index from scratch, so we just add the document: writer.addDocument(doc); }
From source file:gov.nist.basekb.FreebaseTools.java
License:LGPL
public void indexRecord(IndexWriter writer, String subject, Map<String, List<String>> predValues) throws IOException { Document doc = new Document(); Field subjField = new StringField(FIELD_NAME_SUBJECT, normalizeUri(subject), Field.Store.YES); //printlnDbg("DBG: indexRecord: " + subject); doc.add(subjField);/* w w w . ja va 2 s .c o m*/ FieldType VectorField = new FieldType(StringField.TYPE_STORED); VectorField.setStoreTermVectors(true); FieldType TokVectorField = new FieldType(TextField.TYPE_STORED); TokVectorField.setStoreTermVectors(true); long pagerank = 0; for (Map.Entry<String, List<String>> entry : predValues.entrySet()) { String predicate = normalizeUri(entry.getKey()); List<String> values = entry.getValue(); //printlnDbg("DBG: " + predicate + ": " + values); for (String value : values) { String valueType = getValueType(value); value = normalizeValue(value); if (isIndexedPredicate(predicate)) { if (valueType == VALUE_TYPE_URI) // treat URI elements as atomic strings (e.g., types): doc.add(new Field(predicate, value, VectorField)); else if (valueType == VALUE_TYPE_INT) if (predicate.equals("pr_bin")) pagerank = Long.parseLong(value); else doc.add(new SortedNumericDocValuesField(predicate, Long.parseLong(value))); else { // all others, run through the analyzer: String lang = INDEX_LANGUAGE ? getValueLanguage(value) : null; if (INDEX_LANGUAGE && lang != null && (isSupportedLanguage(lang) || isSupportedLanguage(getLanguageRoot(lang)))) { // multi-lingual indexing: if we have a supported language, we store the field // and then add an index entry with the language-qualified predicate: doc.add(new Field(predicate, value, VectorField)); if (INDEX_PREDICATES) doc.add(new Field(languageQualifiedPredicate(predicate, lang), value, TokVectorField)); if (INDEX_TEXT) doc.add(new Field(languageQualifiedPredicate(FIELD_NAME_TEXT, lang), value, TokVectorField)); } else { // mono-lingual indexing or no language designation: if (INDEX_PREDICATES) doc.add(new Field(predicate, value, TokVectorField)); if (INDEX_TEXT) { doc.add(new Field(FIELD_NAME_TEXT, value, TokVectorField)); // make sure we store the triple if it wasn't already: if (!INDEX_PREDICATES) doc.add(new Field(predicate, value, VectorField)); } } } } else doc.add(new Field(predicate, value, VectorField)); } } doc.add(new SortedNumericDocValuesField("pr_bin", pagerank)); // we are creating the index from scratch, so we just add the document: writer.addDocument(doc); }