Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:com.example.search.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from   w  ww.  j a va  2  s . c  om*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            PageProcess pageProcessor;
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
                pageProcessor = new PageProcess(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }
            WebInfo webInfo;
            try {
                //
                // make a new, empty document
                while ((webInfo = pageProcessor.next()) != null) {
                    Document doc = new Document();
                    //process page
                    // Add the path of the file as a field named "path".  Use a
                    // field that is indexed (i.e. searchable), but don't tokenize 
                    // the field into separate words and don't index term frequency
                    // or positional information:
                    //          Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
                    //          pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                    //          doc.add(pathField);
                    Field urlField = new Field("url", webInfo.url, Field.Store.YES, Field.Index.NO);
                    doc.add(urlField);
                    Field publishidField = new Field("publishid", webInfo.publishid, Field.Store.YES,
                            Field.Index.NO);
                    doc.add(publishidField);
                    Field subjectidField = new Field("subjectid", webInfo.subjectid, Field.Store.YES,
                            Field.Index.NO);
                    doc.add(subjectidField);
                    Field titleField = new Field("title", webInfo.title, Field.Store.YES, Field.Index.NO);
                    doc.add(titleField);
                    doc.add(new Field("keywords", webInfo.keywords, Field.Store.YES, Field.Index.NO));
                    doc.add(new Field("description", webInfo.description, Field.Store.YES, Field.Index.NO));
                    doc.add(new Field("content", webInfo.content, Field.Store.YES, Field.Index.ANALYZED));
                    // Add the last modified date of the file a field named "modified".
                    // Use a NumericField that is indexed (i.e. efficiently filterable with
                    // NumericRangeFilter).  This indexes to milli-second resolution, which
                    // is often too fine.  You could instead create a number based on
                    // year/month/day/hour/minutes/seconds, down the resolution you require.
                    // For example the long value 2011021714 would mean
                    // February 17, 2011, 2-3 PM.
                    //          NumericField modifiedField = new NumericField("modified");
                    //          modifiedField.setLongValue(file.lastModified());
                    //          doc.add(modifiedField);

                    // Add the contents of the file to a field named "contents".  Specify a Reader,
                    // so that the text of the file is tokenized and indexed, but not stored.
                    // Note that FileReader expects the file to be in UTF-8 encoding.
                    // If that's not the case searching for special characters will fail.
                    //          doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {//
                        // New index, so we just add the document (no old document can be there):
                        //            System.out.println("adding " + file);
                        writer.addDocument(doc);
                    } else {
                        // Existing index (an old copy of this document may have been indexed) so 
                        // we use updateDocument instead to replace the old one matching the exact 
                        // path, if present:
                        //            System.out.println("updating " + file);
                        writer.updateDocument(new Term("url", webInfo.url), doc);
                    }
                } //end while
            } finally {
                System.out.println("adding " + file);
                fis.close();
            }
        }
    }
}

From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java

License:Apache License

/**
 * Indexes a single document//ww w  .ja  v a2  s  .com
 */
private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.fun.sb.demo.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from www .java 2  s. co  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        //      doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public void saveBookmark(String id, String indexName) {
    try {//  w ww .j  av a  2  s.  co  m
        IndexWriter mainIndexWriter = new IndexWriter(
                FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));

        Document doc = new Document();
        doc.add(new Field("qcId", id, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field("qcIndexName", indexName, Store.YES, Index.NOT_ANALYZED));

        mainIndexWriter.updateDocument(new Term("id", id), doc);
        mainIndexWriter.prepareCommit();
        mainIndexWriter.commit();
        mainIndexWriter.close();
        mainIndexWriter = null;
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:com.github.alvanson.xltsearch.IndexTask.java

License:Apache License

@Override
protected Boolean call() {
    IndexWriter iwriter = null;
    boolean result = false;

    updateMessage("started");
    try {/*from w w w. j  a  va2 s  . co m*/
        int count = 0;
        Docket docket;

        IndexWriterConfig iwconfig = new IndexWriterConfig(config.getVersion(), config.getAnalyzer());
        iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwconfig.setSimilarity(config.getSimilarity());
        iwriter = new IndexWriter(config.getDirectory(), iwconfig);

        while ((docket = inQueue.take()) != Docket.DONE) {
            count++;
            updateMessage(docket.relPath);
            switch (docket.status) {
            case PARSED:
                // index parsed file
                Document doc = new Document();
                // store relative path  ** must be indexed for updateDocument
                doc.add(new StringField(config.pathField, docket.relPath, Field.Store.YES));
                // index content
                doc.add(new TextField(config.contentField, docket.content.toString(), Field.Store.NO));
                // index standard metadata
                for (Map.Entry<String, Property> e : config.metadataFields.entrySet()) {
                    for (String value : docket.metadata.getValues(e.getValue())) {
                        doc.add(new TextField(e.getKey(), value, Field.Store.YES));
                    }
                }
                // store hashsum
                doc.add(new StringField(config.hashSumField, docket.hashSum, Field.Store.YES));
                // add/update document
                iwriter.updateDocument(new Term(config.pathField, docket.relPath), doc);
                // fall through
            case PASS:
                break;
            case DELETE:
                iwriter.deleteDocuments(new Term(config.pathField, docket.relPath));
                break;
            default:
                logger.error("Unexpected docket state while processing {}: {}", docket.relPath,
                        docket.status.toString());
                cancel(true); // cancel task
            }
            updateProgress(count, count + docket.workLeft);
        }
        // end of queue
        updateMessage("complete");
        updateProgress(count, count + docket.workLeft);
        result = true;
    } catch (IOException ex) {
        updateMessage("I/O exception");
        logger.error("I/O exception while writing to index", ex);
    } catch (InterruptedException ex) {
        if (isCancelled()) {
            updateMessage("cancelled");
        } else {
            updateMessage("interrupted");
            logger.error("Interrupted", ex);
        }
    }
    // close iwriter
    if (iwriter != null) {
        try {
            iwriter.close();
        } catch (IOException ex) {
            logger.warn("I/O exception while closing index writer", ex);
        }
    }
    return result;
}

From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;/*from w ww . ja  v a2s  .c om*/
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.updateDocument(new Term("id", document.get("id")), document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", -1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.rnewson.couchdb.lucene.Progress.java

License:Apache License

public void save(final IndexWriter writer) throws IOException {
    writer.updateDocument(PROGRESS_TERM, progress);
}

From source file:com.github.tenorviol.gitsearch.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 *
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the//from www  . ja  va 2s.  co m
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // TODO: make these exclusions configurable
    String fileName = file.getName();
    if (fileName.charAt(0) == '.') {
        return;
    }
    int dotLoc = fileName.lastIndexOf('.');
    String extension = fileName.substring(dotLoc + 1);
    // known binary extensions
    if (extension.equals("jpg") || extension.equals("png") || extension.equals("gif") || extension.equals("pdf")
            || extension.equals("fla") || extension.equals("flv") || extension.equals("swf")
            || extension.equals("swz")) {
        return;
    }

    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

public void update(IndexObject indexObject) {

    IndexWriter indexWriter = null;

    try {/*from   www.  j  a v  a 2s  .  com*/

        Term term = new Term("id", indexObject.getId().toString());
        IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer());
        indexWriter = new IndexWriter(this.getDirectory(), config);
        indexWriter.updateDocument(term, DocumentUtil.IndexObject2Document(indexObject));

    } catch (Exception e) {
        e.printStackTrace();
        try {
            indexWriter.rollback();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    } finally {
        try {
            indexWriter.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }
}

From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * <p/>/*from w w  w.  j ava2s . com*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * &gt;WriteLineDocTask</a&gt;.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file   The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}