Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.mss.mirage.recruitment.ConsultantAction.java

License:Open Source License

private static void indexFile(IndexWriter writer, File f) throws IOException {

    if (f.isHidden() || !f.exists() || !f.canRead()) {
        return;//  w  w w .  ja  v  a2s.c  om
    }
    //System.out.println("Indexing " + f.getCanonicalPath());
    Document doc = new Document();
    doc.add(Field.Text("contents", new FileReader(f)));
    //doc.add(Field.Keyword("filename", f..getCanonicalPath()));
    doc.add(Field.Keyword("filename", f.getAbsolutePath()));
    writer.addDocument(doc);

}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/* w w  w. j av  a2  s  . co  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mycompany.mavenproject1.Main.java

private static void addItem(IndexWriter w, Item i) throws IOException {
    Document doc = new Document();
    doc.add(new StringField("id", String.valueOf(i.getId()), Field.Store.YES));
    doc.add(new StringField("price", String.valueOf(i.getPrice()), Field.Store.YES));
    doc.add(new TextField("name", i.getName(), Field.Store.YES));
    doc.add(new TextField("category", i.getCategory() != null ? i.getCategory() : "ni ma", Field.Store.YES));
    doc.add(new TextField("description", i.getDescription(), Field.Store.YES));
    w.addDocument(doc);
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStream stream = Files.newInputStream(file);
    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);/*ww  w .  j  a v  a  2 s.c  o m*/

    // Add the last modified date of the file a field named "modified".
    // Use a LongField that is indexed (i.e. efficiently filterable with
    // NumericRangeFilter).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongField("modified", lastModified, Field.Store.NO));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents",
            new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        System.out.println("adding " + file);
        writer.addDocument(doc);
    } else {
        // Existing index (an old copy of this document may have been indexed) so 
        // we use updateDocument instead to replace the old one matching the exact 
        // path, if present:
        System.out.println("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
    }
}

From source file:com.nearinfinity.blur.search.TestingPagingCollector.java

License:Apache License

private static IndexReader getReaderFlatScore(int length) throws Exception {
    RAMDirectory directory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()));
    for (int i = 0; i < length; i++) {
        Document document = new Document();
        document.add(new Field("f1", "value", Store.NO, Index.ANALYZED_NO_NORMS));
        indexWriter.addDocument(document);
    }//from  www  .  ja  v a 2s.c  om
    indexWriter.close();
    return IndexReader.open(directory);
}

From source file:com.nearinfinity.blur.utils.TermDocIterableTest.java

License:Apache License

private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
    for (int i = 0; i < count; i++) {
        Document document = new Document();
        document.add(new Field("id", Integer.toString(id), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
        document.add(new Field("field", Integer.toString(i), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
        for (int j = 0; j < 100; j++) {
            document.add(new Field("field" + j,
                    "testing here testing here testing here testing here testing here testing here testing here",
                    Store.YES, Index.NO));
        }// w w w .j ava 2  s . c o  m
        writer.addDocument(document);
    }
}

From source file:com.nero.model.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from w w w .  j a  v a2 s  .  c  o  m
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.netcrest.pado.index.gemfire.lucene.TemporalLuceneDynamicIndexing.java

License:Open Source License

/**
 * Builds a single Lucene index for the specified temporal event.
 * /*from   w w  w. j a v  a 2  s  .c o  m*/
 * @param parser
 *            Lucene parser
 * @param events
 *            Temporal events
 * @param writer
 *            Index writer
 */
private void buildTemporalEntry(StandardQueryParser parser, EntryEvent<ITemporalKey, ITemporalData> event,
        IndexWriter writer) {
    LuceneBuilder luceneBuilder = LuceneBuilder.getLuceneBuilder();

    boolean isKeyMap = false;
    KeyType keyType = null;
    Set<Object> keySet = null;
    Object firstDataObject = null;
    Method[] attributeGetters = null;
    boolean isIdentityKeyPrimitive = false;

    // First, extract out the key type.
    ITemporalKey tk = event.getKey();
    ITemporalData data = event.getNewValue();

    if (data instanceof GemfireTemporalData) {
        firstDataObject = ((GemfireTemporalData) data).getValue();
    } else {
        firstDataObject = data;
    }
    isKeyMap = firstDataObject instanceof KeyMap;
    if (isKeyMap == false) {
        if (firstDataObject instanceof Map) {
            keySet = ((Map) firstDataObject).keySet();
        } else {
            attributeGetters = ReflectionHelper.getAttributeGetters(data.getClass());
        }
    } else {
        keyType = ((KeyMap) firstDataObject).getKeyType();
        if (keyType == null) {
            keySet = ((Map) firstDataObject).keySet();
        }
    }
    Object identityKey = tk.getIdentityKey();
    isIdentityKeyPrimitive = ReflectionHelper.isPrimitiveWrapper(identityKey.getClass());

    // Next, create Lucene doc for the event
    LuceneField luceneField = new LuceneField();
    SimpleDateFormat format = (SimpleDateFormat) DateTool.Resolution.DAY.format.clone();
    Document doc = null;
    try {
        if (keyType != null) {
            KeyMap keyMap;
            if (data instanceof GemfireTemporalData) {
                keyMap = (KeyMap) ((GemfireTemporalData) data).getValue();
            } else {
                keyMap = (KeyMap) data;
            }
            keyType = keyMap.getKeyType();
            Set<String> nameSet = keyType.getNameSet();

            // TODO: See if we can support binary types
            // createDoc();
            doc = luceneBuilder.createKeyMapDocument(parser, writer, tk, data, -1, luceneField, keyType, keyMap,
                    nameSet, isIdentityKeyPrimitive, true, format);
        } else if (keySet != null) {
            Map dataMap;
            if (data instanceof GemfireTemporalData) {
                dataMap = (Map) ((GemfireTemporalData) data).getValue();
            } else {
                dataMap = (Map) data;
            }

            // TODO: See if we can support binary types
            // createDoc();
            doc = luceneBuilder.createMapDocument(parser, writer, tk, data, luceneField, dataMap, keySet,
                    isIdentityKeyPrimitive, format);
        } else {
            if (attributeGetters != null && attributeGetters.length > 0) {
                doc = luceneBuilder.createPojoDocument(parser, writer, tk, data, -1l, luceneField,
                        attributeGetters, isIdentityKeyPrimitive, true/* isNew */, format);
            }
        }
    } catch (Exception ex) {
        Logger.error(ex);
    }

    // Append the newly created doc to Lucene
    if (doc != null) {
        try {
            writer.addDocument(doc);
        } catch (Exception ex) {
            Logger.error(ex);
        }
    }
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java

License:Open Source License

private void updateKeyMapDocument(StandardQueryParser parser, IndexWriter writer, ITemporalList tl,
        ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField, KeyType keyType,
        KeyMap keyMap, Set<String> keyTypeNameSet, boolean isIdentityKeyPrimitive, SimpleDateFormat format)
        throws IOException {
    Query query = null;//from   w w w.ja  va  2s.  c  om
    try {
        String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(),
                tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime());
        query = parser.parse(queryString, "__doc");
    } catch (Exception ex) {
        // Lucene 4.7 bug, internal message not serializable
        // Send message instead of nesting the cause.
        throw new RuntimeException(ex.getMessage());
    }

    writer.deleteDocuments(query);

    Document doc = createKeyMapDocument(parser, writer, tk, data, endWrittenTime, luceneField, keyType, keyMap,
            keyTypeNameSet, isIdentityKeyPrimitive, false, format);
    writer.addDocument(doc);
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java

License:Open Source License

private void updatePojoDocument(StandardQueryParser parser, IndexSearcher searcher, IndexWriter writer,
        ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField,
        Method[] attributeGetters, boolean isIdentityKeyPrimitive, SimpleDateFormat format)
        throws IOException, IllegalArgumentException, IllegalAccessException, InvocationTargetException {
    Query query = null;//w ww.  j  a v  a2s .c o m
    try {
        String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(),
                tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime());
        query = parser.parse(queryString, "__doc");
    } catch (Exception ex) {
        // Lucene 4.7 bug, internal message not serializable
        // Send message instead of nesting the cause.
        throw new RuntimeException(ex.getMessage());
    }

    writer.deleteDocuments(query);

    Document doc = createPojoDocument(parser, writer, tk, data, endWrittenTime, luceneField, attributeGetters,
            isIdentityKeyPrimitive, false, format);
    writer.addDocument(doc);
}