List of usage examples for org.apache.lucene.document LongPoint LongPoint
public LongPoint(String name, long... point)
From source file:IndexTaxis.java
License:Apache License
static void addOneField(Document doc, String fieldName, String rawValue) { // nocommit//from w ww . j av a 2s . com /* if (fieldName.equals("pick_up_lat")) { double value = Double.parseDouble(rawValue); doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } */ switch (fieldName) { case "vendor_id": case "cab_color": case "payment_type": case "trip_type": case "rate_code": case "store_and_fwd_flag": doc.add(new StringField(fieldName, rawValue, Field.Store.NO)); doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue))); break; case "vendor_name": doc.add(new TextField(fieldName, rawValue, Field.Store.NO)); break; case "pick_up_date_time": case "drop_off_date_time": { long value = Long.parseLong(rawValue); doc.add(new LongPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "passenger_count": { int value = Integer.parseInt(rawValue); doc.add(new IntPoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, value)); } break; case "trip_distance": case "pick_up_lat": case "pick_up_lon": case "drop_off_lat": case "drop_off_lon": case "fare_amount": case "surcharge": case "mta_tax": case "extra": case "ehail_fee": case "improvement_surcharge": case "tip_amount": case "tolls_amount": case "total_amount": { double value; try { value = Double.parseDouble(rawValue); } catch (NumberFormatException nfe) { System.out.println( "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\""); return; } doc.add(new DoublePoint(fieldName, value)); doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value))); } break; default: throw new AssertionError("failed to handle field \"" + fieldName + "\""); } }
From source file:antnlp.opie.indexsearch.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8); BufferedReader bufReader = new BufferedReader(iReader); String docLine = null;/* w ww . j a v a 2 s . c o m*/ while ((docLine = bufReader.readLine()) != null) { docLine = docLine.trim(); if (docLine.length() == 0) continue; String[] column = docLine.split("\\t"); System.out.println(column[0]); System.out.println(column[1]); // make a new, empty document Document doc = new Document(); // Add the id of the file as a field named "id". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field docidField = new StringField("docid", column[0], Field.Store.YES); doc.add(docidField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", column[1], Field.Store.YES)); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + column[0]); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + column[0]); writer.updateDocument(new Term("docid", column[0]), doc); } } iReader.close(); bufReader.close(); }
From source file:Application.mediaIndexer.java
/** * Indexes a single document/*from w ww.j a v a 2s . c o m*/ * * @throws TikaException * @throws SAXException */ public static void indexDoc(IndexWriter writer, Path file, TextArea results, long lastModified) throws IOException, SAXException, TikaException { AutoDetectParser parser = new AutoDetectParser(); BodyContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); try (InputStream stream = Files.newInputStream(file)) { parser.parse(stream, handler, metadata); Document doc = new Document(); String[] metadataNames = metadata.names(); for (String name : metadataNames) doc.add(new TextField(name, metadata.get(name), Field.Store.YES)); doc.add(new StringField("path", file.toString(), Field.Store.YES)); doc.add(new LongPoint("modified", lastModified)); results.appendText("Title: " + metadata.get("title") + "\n"); results.appendText("Artists: " + metadata.get("xmpDM:artist") + "\n"); results.appendText("Genre: " + metadata.get("xmpDM:genre") + "\n"); results.appendText("Year: " + metadata.get("xmpDM:releaseDate") + "\n"); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): results.appendText("adding " + file + "\n"); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed): results.appendText("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:cn.larry.search.book.index.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// ww w.j a v a 2 s .com // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.b2international.index.lucene.LongIndexField.java
License:Apache License
@Override protected IndexableField toField(Long value) { return new LongPoint(fieldName(), value); }
From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java
License:Apache License
/** Build the example index. */ public void index() throws IOException { IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); // Add documents with a fake timestamp, 1000 sec before // "now", 2000 sec before "now", ...: for (int i = 0; i < 100; i++) { Document doc = new Document(); long then = nowSec - i * 1000; // Add as doc values field, so we can compute range facets: doc.add(new NumericDocValuesField("timestamp", then)); // Add as numeric field so we can drill-down: doc.add(new LongPoint("timestamp", then)); indexWriter.addDocument(doc);/* w w w . j a v a 2 s. co m*/ } // Open near-real-time searcher searcher = new IndexSearcher(DirectoryReader.open(indexWriter)); indexWriter.close(); }
From source file:com.czw.search.lucene.example.IndexFiles.java
License:Apache License
/** * Indexes a single document//from w w w. j a v a2s . c om */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Indexes a single document/*from ww w .j a v a2 s .co m*/ */ private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.heejong.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w . ja v a 2 s .com*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.helger.pd.indexer.lucene.PDLuceneTest.java
License:Apache License
private static void _doIndex() throws IOException { /*/*from w w w. j av a 2 s . c om*/ * 4. add a sample document to the index */ final Document doc = new Document(); // We add an id field that is searchable, but doesn't trigger // tokenization of the content final Field idField = new StringField("id", "Apache Lucene 5.0.0", Field.Store.YES); doc.add(idField); // Add the last big lucene version birthday which we don't want to store // but to be indexed nevertheless to be filterable doc.add(new LongPoint("lastVersionBirthday", new GregorianCalendar(2015, 1, 20).getTimeInMillis())); // The version info content should be searchable also be tokens, // this is why we use a TextField; as we use a reader, the content is // not stored! doc.add(new TextField("pom", new BufferedReader( new InputStreamReader(new FileInputStream(new File("pom.xml")), StandardCharsets.UTF_8)))); // Existing index try (final PDLucene aLucene = new PDLucene()) { aLucene.updateDocument(new Term("id", "Apache Lucene 5.0.0"), doc); } }