Example usage for org.apache.lucene.document LongPoint LongPoint

List of usage examples for org.apache.lucene.document LongPoint LongPoint

Introduction

In this page you can find the example usage for org.apache.lucene.document LongPoint LongPoint.

Prototype

public LongPoint(String name, long... point) 

Source Link

Document

Creates a new LongPoint, indexing the provided N-dimensional long point.

Usage

From source file:IndexTaxis.java

License:Apache License

static void addOneField(Document doc, String fieldName, String rawValue) {
    // nocommit//from   w  ww  . j av  a 2s  . com
    /*
    if (fieldName.equals("pick_up_lat")) {
      double value = Double.parseDouble(rawValue);
      doc.add(new DoublePoint(fieldName, value));
      doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
    */
    switch (fieldName) {
    case "vendor_id":
    case "cab_color":
    case "payment_type":
    case "trip_type":
    case "rate_code":
    case "store_and_fwd_flag":
        doc.add(new StringField(fieldName, rawValue, Field.Store.NO));
        doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(rawValue)));
        break;
    case "vendor_name":
        doc.add(new TextField(fieldName, rawValue, Field.Store.NO));
        break;
    case "pick_up_date_time":
    case "drop_off_date_time": {
        long value = Long.parseLong(rawValue);
        doc.add(new LongPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "passenger_count": {
        int value = Integer.parseInt(rawValue);
        doc.add(new IntPoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, value));
    }
        break;
    case "trip_distance":
    case "pick_up_lat":
    case "pick_up_lon":
    case "drop_off_lat":
    case "drop_off_lon":
    case "fare_amount":
    case "surcharge":
    case "mta_tax":
    case "extra":
    case "ehail_fee":
    case "improvement_surcharge":
    case "tip_amount":
    case "tolls_amount":
    case "total_amount": {
        double value;
        try {
            value = Double.parseDouble(rawValue);
        } catch (NumberFormatException nfe) {
            System.out.println(
                    "WARNING: failed to parse \"" + rawValue + "\" as double for field \"" + fieldName + "\"");
            return;
        }
        doc.add(new DoublePoint(fieldName, value));
        doc.add(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(value)));
    }
        break;
    default:
        throw new AssertionError("failed to handle field \"" + fieldName + "\"");
    }
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8);
    BufferedReader bufReader = new BufferedReader(iReader);

    String docLine = null;/* w  ww  .  j  a  v a 2 s  . c  o  m*/
    while ((docLine = bufReader.readLine()) != null) {
        docLine = docLine.trim();
        if (docLine.length() == 0)
            continue;
        String[] column = docLine.split("\\t");
        System.out.println(column[0]);
        System.out.println(column[1]);

        // make a new, empty document
        Document doc = new Document();

        // Add the id of the file as a field named "id".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field docidField = new StringField("docid", column[0], Field.Store.YES);
        doc.add(docidField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents", column[1], Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + column[0]);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + column[0]);
            writer.updateDocument(new Term("docid", column[0]), doc);
        }
    }
    iReader.close();
    bufReader.close();
}

From source file:Application.mediaIndexer.java

/**
 * Indexes a single document/*from w ww.j  a v a 2s  .  c  o m*/
 * 
 * @throws TikaException
 * @throws SAXException
 */
public static void indexDoc(IndexWriter writer, Path file, TextArea results, long lastModified)
        throws IOException, SAXException, TikaException {
    AutoDetectParser parser = new AutoDetectParser();
    BodyContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    try (InputStream stream = Files.newInputStream(file)) {
        parser.parse(stream, handler, metadata);
        Document doc = new Document();
        String[] metadataNames = metadata.names();
        for (String name : metadataNames)
            doc.add(new TextField(name, metadata.get(name), Field.Store.YES));
        doc.add(new StringField("path", file.toString(), Field.Store.YES));
        doc.add(new LongPoint("modified", lastModified));
        results.appendText("Title: " + metadata.get("title") + "\n");
        results.appendText("Artists: " + metadata.get("xmpDM:artist") + "\n");
        results.appendText("Genre: " + metadata.get("xmpDM:genre") + "\n");
        results.appendText("Year: " + metadata.get("xmpDM:releaseDate") + "\n");
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            results.appendText("adding " + file + "\n");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed):
            results.appendText("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:cn.larry.search.book.index.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  ww  w.j a v a  2 s .com

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 4 would mean
        // February 17, 1, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.b2international.index.lucene.LongIndexField.java

License:Apache License

@Override
protected IndexableField toField(Long value) {
    return new LongPoint(fieldName(), value);
}

From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);/* w w w . j  a  v a 2  s.  co  m*/
    }

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}

From source file:com.czw.search.lucene.example.IndexFiles.java

License:Apache License

/**
 * Indexes a single document//from   w w w.  j  a v a2s  . c om
 */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java

License:Apache License

/**
 * Indexes a single document/*from ww w .j  a v  a2 s .co m*/
 */
private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.heejong.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w  w w  .  ja v  a 2  s  .com*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 4 would mean
        // February 17, 1, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.helger.pd.indexer.lucene.PDLuceneTest.java

License:Apache License

private static void _doIndex() throws IOException {
    /*/*from   w w w. j  av  a  2 s .  c om*/
     * 4. add a sample document to the index
     */
    final Document doc = new Document();

    // We add an id field that is searchable, but doesn't trigger
    // tokenization of the content
    final Field idField = new StringField("id", "Apache Lucene 5.0.0", Field.Store.YES);
    doc.add(idField);

    // Add the last big lucene version birthday which we don't want to store
    // but to be indexed nevertheless to be filterable
    doc.add(new LongPoint("lastVersionBirthday", new GregorianCalendar(2015, 1, 20).getTimeInMillis()));

    // The version info content should be searchable also be tokens,
    // this is why we use a TextField; as we use a reader, the content is
    // not stored!
    doc.add(new TextField("pom", new BufferedReader(
            new InputStreamReader(new FileInputStream(new File("pom.xml")), StandardCharsets.UTF_8))));

    // Existing index
    try (final PDLucene aLucene = new PDLucene()) {
        aLucene.updateDocument(new Term("id", "Apache Lucene 5.0.0"), doc);
    }
}