Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.mss.mirage.recruitment.ConsultantAction.java

License:Open Source License

private static void indexFile(IndexWriter writer, File f) throws IOException {

    if (f.isHidden() || !f.exists() || !f.canRead()) {
        return;//  w  w w .  ja  v  a2s.c  om
    }
    //System.out.println("Indexing " + f.getCanonicalPath());
    Document doc = new Document();
    doc.add(Field.Text("contents", new FileReader(f)));
    //doc.add(Field.Keyword("filename", f..getCanonicalPath()));
    doc.add(Field.Keyword("filename", f.getAbsolutePath()));
    writer.addDocument(doc);

}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/* w w  w. j av  a2  s  . co  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mycompany.mavenproject1.Main.java

private static void addItem(IndexWriter w, Item i) throws IOException {
    Document doc = new Document();
    doc.add(new StringField("id", String.valueOf(i.getId()), Field.Store.YES));
    doc.add(new StringField("price", String.valueOf(i.getPrice()), Field.Store.YES));
    doc.add(new TextField("name", i.getName(), Field.Store.YES));
    doc.add(new TextField("category", i.getCategory() != null ? i.getCategory() : "ni ma", Field.Store.YES));
    doc.add(new TextField("description", i.getDescription(), Field.Store.YES));
    w.addDocument(doc);
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStream stream = Files.newInputStream(file);
    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);/*ww  w .  j  a v  a  2 s.c  o m*/

    // Add the last modified date of the file a field named "modified".
    // Use a LongField that is indexed (i.e. efficiently filterable with
    // NumericRangeFilter).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongField("modified", lastModified, Field.Store.NO));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents",
            new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        System.out.println("adding " + file);
        writer.addDocument(doc);
    } else {
        // Existing index (an old copy of this document may have been indexed) so 
        // we use updateDocument instead to replace the old one matching the exact 
        // path, if present:
        System.out.println("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
    }
}

From source file:com.nearinfinity.blur.search.TestingPagingCollector.java

License:Apache License

private static IndexReader getReaderFlatScore(int length) throws Exception {
    RAMDirectory directory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()));
    for (int i = 0; i < length; i++) {
        Document document = new Document();
        document.add(new Field("f1", "value", Store.NO, Index.ANALYZED_NO_NORMS));
        indexWriter.addDocument(document);
    }//from  www  .  ja  v a 2s.c  om
    indexWriter.close();
    return IndexReader.open(directory);
}

From source file:com.nearinfinity.blur.utils.TermDocIterableTest.java

License:Apache License

private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
    for (int i = 0; i < count; i++) {
        Document document = new Document();
        document.add(new Field("id", Integer.toString(id), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
        document.add(new Field("field", Integer.toString(i), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
        for (int j = 0; j < 100; j++) {
            document.add(new Field("field" + j,
                    "testing here testing here testing here testing here testing here testing here testing here",
                    Store.YES, Index.NO));
        }// w w w .j ava 2  s . c o  m
        writer.addDocument(document);
    }
}

From source file:com.nero.model.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from w w w .  j a  v a2 s  .  c  o  m
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.netcrest.pado.index.gemfire.lucene.TemporalLuceneDynamicIndexing.java

License:Open Source License

/**
 * Builds a single Lucene index for the specified temporal event.
 * /*from   w w  w. j a v  a 2  s  .c o  m*/
 * @param parser
 *            Lucene parser
 * @param events
 *            Temporal events
 * @param writer
 *            Index writer
 */
private void buildTemporalEntry(StandardQueryParser parser, EntryEvent<ITemporalKey, ITemporalData> event,
        IndexWriter writer) {
    LuceneBuilder luceneBuilder = LuceneBuilder.getLuceneBuilder();

    boolean isKeyMap = false;
    KeyType keyType = null;
    Set<Object> keySet = null;
    Object firstDataObject = null;
    Method[] attributeGetters = null;
    boolean isIdentityKeyPrimitive = false;

    // First, extract out the key type.
    ITemporalKey tk = event.getKey();
    ITemporalData data = event.getNewValue();

    if (data instanceof GemfireTemporalData) {
        firstDataObject = ((GemfireTemporalData) data).getValue();
    } else {
        firstDataObject = data;
    }
    isKeyMap = firstDataObject instanceof KeyMap;
    if (isKeyMap == false) {
        if (firstDataObject instanceof Map) {
            keySet = ((Map) firstDataObject).keySet();
        } else {
            attributeGetters = ReflectionHelper.getAttributeGetters(data.getClass());
        }
    } else {
        keyType = ((KeyMap) firstDataObject).getKeyType();
        if (keyType == null) {
            keySet = ((Map) firstDataObject).keySet();
        }
    }
    Object identityKey = tk.getIdentityKey();
    isIdentityKeyPrimitive = ReflectionHelper.isPrimitiveWrapper(identityKey.getClass());

    // Next, create Lucene doc for the event
    LuceneField luceneField = new LuceneField();
    SimpleDateFormat format = (SimpleDateFormat) DateTool.Resolution.DAY.format.clone();
    Document doc = null;
    try {
        if (keyType != null) {
            KeyMap keyMap;
            if (data instanceof GemfireTemporalData) {
                keyMap = (KeyMap) ((GemfireTemporalData) data).getValue();
            } else {
                keyMap = (KeyMap) data;
            }
            keyType = keyMap.getKeyType();
            Set<String> nameSet = keyType.getNameSet();

            // TODO: See if we can support binary types
            // createDoc();
            doc = luceneBuilder.createKeyMapDocument(parser, writer, tk, data, -1, luceneField, keyType, keyMap,
                    nameSet, isIdentityKeyPrimitive, true, format);
        } else if (keySet != null) {
            Map dataMap;
            if (data instanceof GemfireTemporalData) {
                dataMap = (Map) ((GemfireTemporalData) data).getValue();
            } else {
                dataMap = (Map) data;
            }

            // TODO: See if we can support binary types
            // createDoc();
            doc = luceneBuilder.createMapDocument(parser, writer, tk, data, luceneField, dataMap, keySet,
                    isIdentityKeyPrimitive, format);
        } else {
            if (attributeGetters != null && attributeGetters.length > 0) {
                doc = luceneBuilder.createPojoDocument(parser, writer, tk, data, -1l, luceneField,
                        attributeGetters, isIdentityKeyPrimitive, true/* isNew */, format);
            }
        }
    } catch (Exception ex) {
        Logger.error(ex);
    }

    // Append the newly created doc to Lucene
    if (doc != null) {
        try {
            writer.addDocument(doc);
        } catch (Exception ex) {
            Logger.error(ex);
        }
    }
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java

License:Open Source License

private void updateKeyMapDocument(StandardQueryParser parser, IndexWriter writer, ITemporalList tl,
        ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField, KeyType keyType,
        KeyMap keyMap, Set<String> keyTypeNameSet, boolean isIdentityKeyPrimitive, SimpleDateFormat format)
        throws IOException {
    Query query = null;//from   w w w.ja  va  2s.  c  om
    try {
        String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(),
                tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime());
        query = parser.parse(queryString, "__doc");
    } catch (Exception ex) {
        // Lucene 4.7 bug, internal message not serializable
        // Send message instead of nesting the cause.
        throw new RuntimeException(ex.getMessage());
    }

    writer.deleteDocuments(query);

    Document doc = createKeyMapDocument(parser, writer, tk, data, endWrittenTime, luceneField, keyType, keyMap,
            keyTypeNameSet, isIdentityKeyPrimitive, false, format);
    writer.addDocument(doc);
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java

License:Open Source License

private void updatePojoDocument(StandardQueryParser parser, IndexSearcher searcher, IndexWriter writer,
        ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField,
        Method[] attributeGetters, boolean isIdentityKeyPrimitive, SimpleDateFormat format)
        throws IOException, IllegalArgumentException, IllegalAccessException, InvocationTargetException {
    Query query = null;//w ww.  j  a v  a2s .c o m
    try {
        String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(),
                tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime());
        query = parser.parse(queryString, "__doc");
    } catch (Exception ex) {
        // Lucene 4.7 bug, internal message not serializable
        // Send message instead of nesting the cause.
        throw new RuntimeException(ex.getMessage());
    }

    writer.deleteDocuments(query);

    Document doc = createPojoDocument(parser, writer, tk, data, endWrittenTime, luceneField, attributeGetters,
            isIdentityKeyPrimitive, false, format);
    writer.addDocument(doc);
}