List of usage examples for org.apache.lucene.document NumericDocValuesField NumericDocValuesField
public NumericDocValuesField(String name, Long value)
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();// w ww . jav a 2s .co m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java
License:Apache License
/** * Index gazetteer's one line data by built-in Lucene Index functions * * @param indexWriter Lucene indexWriter to be loaded * @param line a line from the gazetteer file * @throws IOException// w w w. j a v a 2 s. c o m * @throws NumberFormatException */ private void addDoc(IndexWriter indexWriter, final String line, final boolean reverseGeocodingEnabled) { String[] tokens = line.split("\t"); int ID = Integer.parseInt(tokens[0]); String name = tokens[1]; String alternatenames = tokens[3]; Double latitude = -999999.0; try { latitude = Double.parseDouble(tokens[4]); } catch (NumberFormatException e) { latitude = OUT_OF_BOUNDS; } Double longitude = -999999.0; try { longitude = Double.parseDouble(tokens[5]); } catch (NumberFormatException e) { longitude = OUT_OF_BOUNDS; } int population = 0; try { population = Integer.parseInt(tokens[14]); } catch (NumberFormatException e) { population = 0;// Treat as population does not exists } // Additional fields to rank more known locations higher // All available codes can be viewed on www.geonames.org String featureCode = tokens[7];// more granular category String countryCode = tokens[8]; String admin1Code = tokens[10];// eg US State String admin2Code = tokens[11];// eg county Document doc = new Document(); doc.add(new IntField(FIELD_NAME_ID, ID, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_NAME, name, Field.Store.YES)); doc.add(new DoubleField(FIELD_NAME_LONGITUDE, longitude, Field.Store.YES)); doc.add(new DoubleField(FIELD_NAME_LATITUDE, latitude, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_ALTERNATE_NAMES, alternatenames, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_FEATURE_CODE, featureCode, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_COUNTRY_CODE, countryCode, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_ADMIN1_CODE, admin1Code, Field.Store.YES)); doc.add(new TextField(FIELD_NAME_ADMIN2_CODE, admin2Code, Field.Store.YES)); doc.add(new NumericDocValuesField(FIELD_NAME_POPULATION, population));//sort enabled field if (reverseGeocodingEnabled) { Point point = ctx.makePoint(longitude, latitude); for (IndexableField f : strategy.createIndexableFields(point)) { doc.add(f); } } try { indexWriter.addDocument(doc); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java
License:Apache License
public static void fillDocumentID(Document doc, long id) { Field uidField = new NumericDocValuesField(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD, id); doc.add(uidField);/* w ww . ja v a 2s . co m*/ }
From source file:collene.TestShakespeare.java
License:Apache License
@Test public void rest() throws IOException, ParseException { File shakespeareDir = new File("src/test/resources/shakespeare"); File[] files = shakespeareDir.listFiles(new FileFilter() { @Override/* w w w. j a va 2 s .c om*/ public boolean accept(File pathname) { return !pathname.isHidden(); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); long startIndexTime = System.currentTimeMillis(); final int flushLines = 200; int totalLines = 0; Collection<Document> documents = new ArrayList<Document>(); for (File f : files) { String play = f.getName(); int lineNumber = 1; BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = reader.readLine(); while (line != null) { // index it. Document doc = new Document(); doc.add(new NumericDocValuesField("line", lineNumber)); doc.add(new Field("play", play, TextField.TYPE_STORED)); doc.add(new Field("content", line, TextField.TYPE_STORED)); documents.add(doc); totalLines += 1; if (totalLines % flushLines == 0) { writer.addDocuments(documents); documents.clear(); } lineNumber += 1; line = reader.readLine(); } reader.close(); } if (documents.size() > 0) { writer.addDocuments(documents); } long endIndexTime = System.currentTimeMillis(); System.out.println( String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime)); //System.out.println(String.format("%s committed", directory.getClass().getSimpleName())); // writer.forceMerge(1); // System.out.println(String.format("%s merged", directory.getClass().getSimpleName())); // let's search! IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer); String[] queryTerms = new String[] { "trumpet" }; for (String term : queryTerms) { long searchStart = System.currentTimeMillis(); Query query = parser.parse(term); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc doc : docs.scoreDocs) { System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex)); } } writer.close(true); //System.out.println(String.format("%s closed", directory.getClass().getSimpleName())); System.out.println("I think these are the files:"); for (String s : directory.listAll()) { System.out.println(s); } directory.close(); }
From source file:com.b2international.index.lucene.DocValuesIntIndexField.java
License:Apache License
@Override public NumericDocValuesField toDocValuesField(Integer value) { return new NumericDocValuesField(fieldName(), value); }
From source file:com.b2international.index.lucene.DocValuesLongIndexField.java
License:Apache License
@Override public NumericDocValuesField toDocValuesField(Long value) { return new NumericDocValuesField(fieldName(), value); }
From source file:com.b2international.index.lucene.StoredOnlyDocValuesLongIndexField.java
License:Apache License
@Override public NumericDocValuesField toDocValuesField(T value) { if (value instanceof Long) { return new NumericDocValuesField(fieldName(), (long) value); } else if (value instanceof Integer) { return new NumericDocValuesField(fieldName(), (int) value); } else {// ww w. j ava2s . c o m throw new IllegalArgumentException("Integer and Long types only"); } }
From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter // Add documents with latitude/longitude location: // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring) Document doc = new Document(); doc.add(new DoublePoint("latitude", 40.759011)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011))); doc.add(new DoublePoint("longitude", -73.9844722)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722))); writer.addDocument(doc);/*w w w .j av a2 s. c o m*/ doc = new Document(); doc.add(new DoublePoint("latitude", 40.718266)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266))); doc.add(new DoublePoint("longitude", -74.007819)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819))); writer.addDocument(doc); doc = new Document(); doc.add(new DoublePoint("latitude", 40.7051157)); doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157))); doc.add(new DoublePoint("longitude", -74.0088305)); doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305))); writer.addDocument(doc); // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(writer)); writer.close(); }
From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java
License:Apache License
/** Build the example index. */ private void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Writes facet ords to a separate directory from the main index DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); Document doc = new Document(); doc.add(new TextField("c", "foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 5L)); doc.add(new FacetField("A", "B")); indexWriter.addDocument(config.build(taxoWriter, doc)); doc = new Document(); doc.add(new TextField("c", "foo foo bar", Store.NO)); doc.add(new NumericDocValuesField("popularity", 3L)); doc.add(new FacetField("A", "C")); indexWriter.addDocument(config.build(taxoWriter, doc)); indexWriter.close();/* www. j a va2 s. co m*/ taxoWriter.close(); }
From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for (int i = 0; i < 100; i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongPoint("timestamp", then)); indexWriter.addDocument(doc);// ww w. ja v a 2s. c o m } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter)); indexWriter.close(); }