Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

protected synchronized void add(T object, IndexWriter indexWriter) {
    int docNum = getDocNum(object);
    if (docNum == -1) {
        Document doc = new Document();
        Field[] indexFields = createIndexFields();
        for (Field indexField : indexFields) {
            populateIndexField(object, indexField, doc);
            if (StringUtils.isEmpty(indexField.stringValue())) {
                indexField.setStringValue(NULL_VALUE);
            }/* w w  w.  j  a  v  a  2  s  . c  o  m*/
            doc.add(indexField);
        }
        try {
            indexWriter.addDocument(doc);
        } catch (CorruptIndexException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else {
        throw new RuntimeException("Document dj existant! (docNum:" + docNum + ")");
    }
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

@Override
public synchronized void deleteAll() {
    try {//from  www  .j  a va  2  s  . c o m
        Directory directory = FSDirectory.open(indexDir);
        Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
        IndexWriter indexWriter = new IndexWriter(directory,
                new IndexWriterConfig(Version.LUCENE_44, analyzer));
        indexWriter.deleteAll();
        indexWriter.addDocument(new Document());
        //            indexWriter.optimize();
        indexWriter.close();
        directory.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

private void createIndexIfNecessary() {
    try {/* ww w.  j  a  va 2 s  .  c o  m*/
        Directory directory = FSDirectory.open(indexDir);
        if (!DirectoryReader.indexExists(directory)) {
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            IndexWriter indexWriter = new IndexWriter(directory,
                    new IndexWriterConfig(Version.LUCENE_44, analyzer));
            indexWriter.addDocument(new Document());
            indexWriter.close();
        }
        directory.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.dreamerpartner.codereview.lucene.IndexHelper.java

License:Apache License

/**
 * ?//w  ww  . j a  v a2s. c o m
 * @param module ?
 * @param doc
 * @param isNew
 * @param delTerm del
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public static void add(String module, Document doc, boolean isNew, Term delTerm) throws IOException {
    long beginTime = System.currentTimeMillis();
    IndexWriter writer = null;
    try {
        Directory dir = FSDirectory.open(new File(LuceneUtil.getIndexPath(module)));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer);
        iwc.setMaxBufferedDocs(100);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        //         iwc.setRAMBufferSizeMB(256.0);// ?
        writer = new IndexWriter(dir, iwc);
        if (isNew) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(delTerm, doc);
        }
        //???
        writer.commit();
    } finally {
        long endTime = System.currentTimeMillis();
        logger.debug("isNew:" + isNew + ", add consume " + (endTime - beginTime) + " milliseconds.");
        if (writer != null)
            writer.close();
    }
}

From source file:com.duroty.lucene.bookmark.indexer.BookmarkIndexer.java

License:Open Source License

/**
 * DOCUMENT ME!/*from w  w  w. jav  a2  s  .  com*/
 *
 * @param path DOCUMENT ME!
 * @param id DOCUMENT ME!
 * @param doc DOCUMENT ME!
 * @param analyzer DOCUMENT ME!
 *
 * @throws Exception DOCUMENT ME!
 */
public void insertDocument(String path, String id, Document doc, Analyzer analyzer) throws Exception {
    if (!path.endsWith(File.separator)) {
        path = path + File.separator + SIMPLE_PATH_NAME + File.separator;
    } else {
        path = path + SIMPLE_PATH_NAME + File.separator;
    }

    IndexWriter writer = null;
    File file = null;
    boolean create = false;

    try {
        file = new File(path + id);

        if (!IndexReader.indexExists(file)) {
            file.mkdirs();

            create = true;
        }

        if (IndexReader.isLocked(path)) {
            Thread.sleep(sleepTime);

            if (countInsert > 5) {
                throw new Exception("The index lucene MainIndexer is locked insert document");
            }

            countInsert++;

            insertDocument(path, id, doc, analyzer);

            return;
        }

        Directory dir = FSDirectory.getDirectory(file, create);

        writer = new IndexWriter(dir, analyzer, create);
        writer.setMaxFieldLength(Integer.MAX_VALUE);
        writer.addDocument(doc);

        writer.optimize();

        writer.close();
        writer = null;
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }

        //Aquest fitxer dins el directori de l'index individual ens permet saber si est indexant
        File unlock = new File(file, FileUtilities.FILE_IS_UNLOCK);

        try {
            unlock.createNewFile();
        } catch (Exception e) {
        }
    }
}

From source file:com.duroty.lucene.bookmark.indexer.BookmarkIndexer.java

License:Open Source License

/**
 * DOCUMENT ME!/* ww w  . j  ava 2 s .  c  o  m*/
 *
 * @param path DOCUMENT ME!
 * @param field DOCUMENT ME!
 * @param doc DOCUMENT ME!
 */
public static void createSpell(String path, String field, Document doc) throws Exception {
    RAMDirectory ramDir = null;
    IndexWriter writer = null;

    try {
        ramDir = new RAMDirectory();
        writer = new IndexWriter(ramDir, new DictionaryAnalyzer(), true);
        writer.addDocument(doc);
        writer.optimize();
        writer.close();

        DidYouMeanIndexer.createSpell(field, ramDir, path);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:com.duroty.service.analyzer.LuceneFiltersAnalysis.java

License:Open Source License

public void service(String repositoryName, String messageName, MimeMessage mime)
        throws Exception, Throwable, OutOfMemoryError {
    Session hsession = null;//from ww w.  j  a v  a  2  s . com
    RAMDirectory auxDir = null;

    try {
        hsession = hfactory.openSession();

        auxDir = new RAMDirectory();

        IndexWriter auxWriter = new IndexWriter(auxDir, analyzer, true);
        auxWriter.addDocument(luceneMessage.getDocPrincipal());
        auxWriter.optimize();
        auxWriter.close();

        Vector filters = getFilters(hsession, repositoryName);

        boolean setbox = true;

        String box = message.getMesBox();

        if (box.equals("SPAM")) {
            setbox = false;
        } else if (box.equals("DRAFT")) {
            setbox = false;
        }

        if (filters != null) {
            while (filters.size() > 0) {
                Filter filter = (Filter) filters.remove(0);
                IndexSearcher auxSearcher = new IndexSearcher(auxDir);

                org.apache.lucene.search.Query query = FilterQueryParser.parse(filter, analyzer);

                Hits hits = auxSearcher.search(query);

                if (hits.length() > 0) {
                    //he tingut una coincidencia de filtre per tant cal dur a terme les accions assocides
                    //al filtre
                    if (filter.isFilArchive() && setbox) {
                        //Marco un header per a que s'inserti a la carpeta d'archived
                        message.setMesBox("HIDDEN");
                    } else if (filter.isFilTrash() && setbox) {
                        message.setMesBox("TRASH");
                    } else {
                    }

                    if (filter.isFilImportant()) {
                        message.setMesFlagged(new Boolean(true));
                    }

                    if (filter.getLabel() != null) {
                        LabMes labMes = new LabMes(new LabMesId(message, filter.getLabel()));
                        message.addLabMeses(labMes);
                    }

                    if ((filter.getFilForwardTo() != null) && !filter.getFilForwardTo().trim().equals("")) {
                        InternetAddress forwardTo = null;

                        try {
                            forwardTo = new InternetAddress(filter.getFilForwardTo());
                        } catch (Exception e) {
                            forwardTo = null;
                        }

                        if (forwardTo != null) {
                            try {
                                InternetAddress recipient = (InternetAddress) mime.getFrom()[0];
                                forwardMailFromLabel(recipient, forwardTo, "FW: ", mime);
                            } catch (Exception ex) {
                                ex.printStackTrace();
                            }
                        }
                    }
                }
            }
        }
    } catch (Exception ex) {
        DLog.log(DLog.DEBUG, this.getClass(), ex);
    } finally {
        GeneralOperations.closeHibernateSession(hsession);

        if (auxDir != null) {
            auxDir.close();
        }
    }
}

From source file:com.eclipsesource.connect.search.Indexer.java

License:Open Source License

private void addDocument(IndexWriter indexWriter, Document document) {
    try {//from   w w w. ja v a2 s .c  om
        indexWriter.addDocument(document);
    } catch (IOException shouldNotHappen) {
        throw new IllegalStateException(shouldNotHappen);
    }
}

From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Indexes page using the given IndexWriter.
 *
 *  @param page WikiPage/*ww  w  . j a  va2s  .  c  om*/
 *  @param text Page text to index
 *  @param writer The Lucene IndexWriter to use for indexing
 *  @return the created index Document
 *  @throws IOException If there's an indexing problem
 */
protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer) throws IOException {
    if (log.isDebugEnabled())
        log.debug("Indexing " + page.getName() + "...");

    // make a new, empty document
    Document doc = new Document();

    if (text == null)
        return doc;

    // Raw name is the keyword we'll use to refer to this document for updates.
    Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.UN_TOKENIZED);
    doc.add(field);

    // Body text.  It is stored in the doc for search contexts.
    field = new Field(LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by page name. Both beautified and raw
    String unTokenizedTitle = StringUtils.replaceChars(page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED,
            c_punctuationSpaces);

    field = new Field(LUCENE_PAGE_NAME, TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle,
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by authorname

    if (page.getAuthor() != null) {
        field = new Field(LUCENE_AUTHOR, page.getAuthor(), Field.Store.YES, Field.Index.TOKENIZED,
                Field.TermVector.NO);
        doc.add(field);
    }

    // Now add the names of the attachments of this page
    try {
        Collection attachments = m_engine.getAttachmentManager().listAttachments(page);
        String attachmentNames = "";

        for (Iterator it = attachments.iterator(); it.hasNext();) {
            Attachment att = (Attachment) it.next();
            attachmentNames += att.getName() + ";";
        }
        field = new Field(LUCENE_ATTACHMENTS, attachmentNames, Field.Store.YES, Field.Index.TOKENIZED,
                Field.TermVector.NO);
        doc.add(field);

    } catch (ProviderException e) {
        // Unable to read attachments
        log.error("Failed to get attachments for page", e);
    }
    writer.addDocument(doc);

    return doc;
}

From source file:com.eden.lucene.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from www  .  j  av  a  2  s  .  c om*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 4 would mean
                // February 17, 1, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}