Example usage for org.apache.lucene.document NumericDocValuesField NumericDocValuesField

List of usage examples for org.apache.lucene.document NumericDocValuesField NumericDocValuesField

Introduction

In this page you can find the example usage for org.apache.lucene.document NumericDocValuesField NumericDocValuesField.

Prototype

public NumericDocValuesField(String name, Long value) 

Source Link

Document

Creates a new DocValues field with the specified 64-bit long value

Usage

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();// w  ww  .  jav a  2s .co  m
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java

License:Apache License

/**
 * Index gazetteer's one line data by built-in Lucene Index functions
 *
 * @param indexWriter Lucene indexWriter to be loaded
 * @param line a line from the gazetteer file
 * @throws IOException//  w w w. j a v a  2  s. c o m
 * @throws NumberFormatException
 */
private void addDoc(IndexWriter indexWriter, final String line, final boolean reverseGeocodingEnabled) {
    String[] tokens = line.split("\t");

    int ID = Integer.parseInt(tokens[0]);
    String name = tokens[1];
    String alternatenames = tokens[3];

    Double latitude = -999999.0;
    try {
        latitude = Double.parseDouble(tokens[4]);
    } catch (NumberFormatException e) {
        latitude = OUT_OF_BOUNDS;
    }
    Double longitude = -999999.0;
    try {
        longitude = Double.parseDouble(tokens[5]);
    } catch (NumberFormatException e) {
        longitude = OUT_OF_BOUNDS;
    }

    int population = 0;
    try {
        population = Integer.parseInt(tokens[14]);
    } catch (NumberFormatException e) {
        population = 0;// Treat as population does not exists
    }

    // Additional fields to rank more known locations higher
    // All available codes can be viewed on www.geonames.org
    String featureCode = tokens[7];// more granular category
    String countryCode = tokens[8];
    String admin1Code = tokens[10];// eg US State
    String admin2Code = tokens[11];// eg county

    Document doc = new Document();
    doc.add(new IntField(FIELD_NAME_ID, ID, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_NAME, name, Field.Store.YES));
    doc.add(new DoubleField(FIELD_NAME_LONGITUDE, longitude, Field.Store.YES));
    doc.add(new DoubleField(FIELD_NAME_LATITUDE, latitude, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_ALTERNATE_NAMES, alternatenames, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_FEATURE_CODE, featureCode, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_COUNTRY_CODE, countryCode, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_ADMIN1_CODE, admin1Code, Field.Store.YES));
    doc.add(new TextField(FIELD_NAME_ADMIN2_CODE, admin2Code, Field.Store.YES));
    doc.add(new NumericDocValuesField(FIELD_NAME_POPULATION, population));//sort enabled field

    if (reverseGeocodingEnabled) {
        Point point = ctx.makePoint(longitude, latitude);
        for (IndexableField f : strategy.createIndexableFields(point)) {
            doc.add(f);
        }
    }

    try {
        indexWriter.addDocument(doc);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java

License:Apache License

public static void fillDocumentID(Document doc, long id) {
    Field uidField = new NumericDocValuesField(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD, id);
    doc.add(uidField);/*  w ww  .  ja v  a  2s .  co  m*/
}

From source file:collene.TestShakespeare.java

License:Apache License

@Test
public void rest() throws IOException, ParseException {
    File shakespeareDir = new File("src/test/resources/shakespeare");
    File[] files = shakespeareDir.listFiles(new FileFilter() {
        @Override/*  w  w  w.  j a va 2  s  .c om*/
        public boolean accept(File pathname) {
            return !pathname.isHidden();
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    long startIndexTime = System.currentTimeMillis();
    final int flushLines = 200;
    int totalLines = 0;
    Collection<Document> documents = new ArrayList<Document>();
    for (File f : files) {
        String play = f.getName();
        int lineNumber = 1;
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
        String line = reader.readLine();
        while (line != null) {
            // index it.

            Document doc = new Document();
            doc.add(new NumericDocValuesField("line", lineNumber));
            doc.add(new Field("play", play, TextField.TYPE_STORED));
            doc.add(new Field("content", line, TextField.TYPE_STORED));
            documents.add(doc);

            totalLines += 1;
            if (totalLines % flushLines == 0) {
                writer.addDocuments(documents);
                documents.clear();
            }

            lineNumber += 1;
            line = reader.readLine();
        }
        reader.close();
    }

    if (documents.size() > 0) {
        writer.addDocuments(documents);
    }
    long endIndexTime = System.currentTimeMillis();

    System.out.println(
            String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime));

    //System.out.println(String.format("%s committed", directory.getClass().getSimpleName()));
    //        writer.forceMerge(1);
    //        System.out.println(String.format("%s merged", directory.getClass().getSimpleName()));

    // let's search!
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer);

    String[] queryTerms = new String[] { "trumpet" };

    for (String term : queryTerms) {
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse(term);
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc doc : docs.scoreDocs) {
            System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex));
        }
    }

    writer.close(true);
    //System.out.println(String.format("%s closed", directory.getClass().getSimpleName()));

    System.out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        System.out.println(s);
    }

    directory.close();
}

From source file:com.b2international.index.lucene.DocValuesIntIndexField.java

License:Apache License

@Override
public NumericDocValuesField toDocValuesField(Integer value) {
    return new NumericDocValuesField(fieldName(), value);
}

From source file:com.b2international.index.lucene.DocValuesLongIndexField.java

License:Apache License

@Override
public NumericDocValuesField toDocValuesField(Long value) {
    return new NumericDocValuesField(fieldName(), value);
}

From source file:com.b2international.index.lucene.StoredOnlyDocValuesLongIndexField.java

License:Apache License

@Override
public NumericDocValuesField toDocValuesField(T value) {
    if (value instanceof Long) {
        return new NumericDocValuesField(fieldName(), (long) value);
    } else if (value instanceof Integer) {
        return new NumericDocValuesField(fieldName(), (int) value);
    } else {// ww  w.  j  ava2s  .  c o m
        throw new IllegalArgumentException("Integer and Long types only");
    }
}

From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoublePoint("latitude", 40.759011));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoublePoint("longitude", -73.9844722));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);/*w w  w  .j  av  a2 s.  c  o  m*/

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.718266));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoublePoint("longitude", -74.007819));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.7051157));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoublePoint("longitude", -74.0088305));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer));
    writer.close();
}

From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*  www. j  a  va2 s. co m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);// ww w.  ja v  a  2s. c  o m
    }

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}