Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

protected synchronized void add(T object, IndexWriter indexWriter) {
    int docNum = getDocNum(object);
    if (docNum == -1) {
        Document doc = new Document();
        Field[] indexFields = createIndexFields();
        for (Field indexField : indexFields) {
            populateIndexField(object, indexField, doc);
            if (StringUtils.isEmpty(indexField.stringValue())) {
                indexField.setStringValue(NULL_VALUE);
            }/* w w  w.  j  a  v  a  2  s  . c  o  m*/
            doc.add(indexField);
        }
        try {
            indexWriter.addDocument(doc);
        } catch (CorruptIndexException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    } else {
        throw new RuntimeException("Document dj existant! (docNum:" + docNum + ")");
    }
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

@Override
public synchronized void deleteAll() {
    try {//from  www  .j  a va  2  s  . c o m
        Directory directory = FSDirectory.open(indexDir);
        Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
        IndexWriter indexWriter = new IndexWriter(directory,
                new IndexWriterConfig(Version.LUCENE_44, analyzer));
        indexWriter.deleteAll();
        indexWriter.addDocument(new Document());
        //            indexWriter.optimize();
        indexWriter.close();
        directory.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

private void createIndexIfNecessary() {
    try {/* ww w.  j  a  va 2 s  .  c o  m*/
        Directory directory = FSDirectory.open(indexDir);
        if (!DirectoryReader.indexExists(directory)) {
            Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
            IndexWriter indexWriter = new IndexWriter(directory,
                    new IndexWriterConfig(Version.LUCENE_44, analyzer));
            indexWriter.addDocument(new Document());
            indexWriter.close();
        }
        directory.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.dreamerpartner.codereview.lucene.IndexHelper.java

License:Apache License

/**
 * ?//w  ww  . j a  v a2s. c o m
 * @param module ?
 * @param doc
 * @param isNew
 * @param delTerm del
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public static void add(String module, Document doc, boolean isNew, Term delTerm) throws IOException {
    long beginTime = System.currentTimeMillis();
    IndexWriter writer = null;
    try {
        Directory dir = FSDirectory.open(new File(LuceneUtil.getIndexPath(module)));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer);
        iwc.setMaxBufferedDocs(100);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        //         iwc.setRAMBufferSizeMB(256.0);// ?
        writer = new IndexWriter(dir, iwc);
        if (isNew) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(delTerm, doc);
        }
        //???
        writer.commit();
    } finally {
        long endTime = System.currentTimeMillis();
        logger.debug("isNew:" + isNew + ", add consume " + (endTime - beginTime) + " milliseconds.");
        if (writer != null)
            writer.close();
    }
}

From source file:com.duroty.lucene.bookmark.indexer.BookmarkIndexer.java

License:Open Source License

/**
 * DOCUMENT ME!/*from w  w  w. jav  a2  s  .  com*/
 *
 * @param path DOCUMENT ME!
 * @param id DOCUMENT ME!
 * @param doc DOCUMENT ME!
 * @param analyzer DOCUMENT ME!
 *
 * @throws Exception DOCUMENT ME!
 */
public void insertDocument(String path, String id, Document doc, Analyzer analyzer) throws Exception {
    if (!path.endsWith(File.separator)) {
        path = path + File.separator + SIMPLE_PATH_NAME + File.separator;
    } else {
        path = path + SIMPLE_PATH_NAME + File.separator;
    }

    IndexWriter writer = null;
    File file = null;
    boolean create = false;

    try {
        file = new File(path + id);

        if (!IndexReader.indexExists(file)) {
            file.mkdirs();

            create = true;
        }

        if (IndexReader.isLocked(path)) {
            Thread.sleep(sleepTime);

            if (countInsert > 5) {
                throw new Exception("The index lucene MainIndexer is locked insert document");
            }

            countInsert++;

            insertDocument(path, id, doc, analyzer);

            return;
        }

        Directory dir = FSDirectory.getDirectory(file, create);

        writer = new IndexWriter(dir, analyzer, create);
        writer.setMaxFieldLength(Integer.MAX_VALUE);
        writer.addDocument(doc);

        writer.optimize();

        writer.close();
        writer = null;
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }

        //Aquest fitxer dins el directori de l'index individual ens permet saber si est indexant
        File unlock = new File(file, FileUtilities.FILE_IS_UNLOCK);

        try {
            unlock.createNewFile();
        } catch (Exception e) {
        }
    }
}

From source file:com.duroty.lucene.bookmark.indexer.BookmarkIndexer.java

License:Open Source License

/**
 * DOCUMENT ME!/* ww w  . j  ava 2 s .  c  o  m*/
 *
 * @param path DOCUMENT ME!
 * @param field DOCUMENT ME!
 * @param doc DOCUMENT ME!
 */
public static void createSpell(String path, String field, Document doc) throws Exception {
    RAMDirectory ramDir = null;
    IndexWriter writer = null;

    try {
        ramDir = new RAMDirectory();
        writer = new IndexWriter(ramDir, new DictionaryAnalyzer(), true);
        writer.addDocument(doc);
        writer.optimize();
        writer.close();

        DidYouMeanIndexer.createSpell(field, ramDir, path);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:com.duroty.service.analyzer.LuceneFiltersAnalysis.java

License:Open Source License

public void service(String repositoryName, String messageName, MimeMessage mime)
        throws Exception, Throwable, OutOfMemoryError {
    Session hsession = null;//from ww w.  j  a v  a  2  s . com
    RAMDirectory auxDir = null;

    try {
        hsession = hfactory.openSession();

        auxDir = new RAMDirectory();

        IndexWriter auxWriter = new IndexWriter(auxDir, analyzer, true);
        auxWriter.addDocument(luceneMessage.getDocPrincipal());
        auxWriter.optimize();
        auxWriter.close();

        Vector filters = getFilters(hsession, repositoryName);

        boolean setbox = true;

        String box = message.getMesBox();

        if (box.equals("SPAM")) {
            setbox = false;
        } else if (box.equals("DRAFT")) {
            setbox = false;
        }

        if (filters != null) {
            while (filters.size() > 0) {
                Filter filter = (Filter) filters.remove(0);
                IndexSearcher auxSearcher = new IndexSearcher(auxDir);

                org.apache.lucene.search.Query query = FilterQueryParser.parse(filter, analyzer);

                Hits hits = auxSearcher.search(query);

                if (hits.length() > 0) {
                    //he tingut una coincidencia de filtre per tant cal dur a terme les accions assocides
                    //al filtre
                    if (filter.isFilArchive() && setbox) {
                        //Marco un header per a que s'inserti a la carpeta d'archived
                        message.setMesBox("HIDDEN");
                    } else if (filter.isFilTrash() && setbox) {
                        message.setMesBox("TRASH");
                    } else {
                    }

                    if (filter.isFilImportant()) {
                        message.setMesFlagged(new Boolean(true));
                    }

                    if (filter.getLabel() != null) {
                        LabMes labMes = new LabMes(new LabMesId(message, filter.getLabel()));
                        message.addLabMeses(labMes);
                    }

                    if ((filter.getFilForwardTo() != null) && !filter.getFilForwardTo().trim().equals("")) {
                        InternetAddress forwardTo = null;

                        try {
                            forwardTo = new InternetAddress(filter.getFilForwardTo());
                        } catch (Exception e) {
                            forwardTo = null;
                        }

                        if (forwardTo != null) {
                            try {
                                InternetAddress recipient = (InternetAddress) mime.getFrom()[0];
                                forwardMailFromLabel(recipient, forwardTo, "FW: ", mime);
                            } catch (Exception ex) {
                                ex.printStackTrace();
                            }
                        }
                    }
                }
            }
        }
    } catch (Exception ex) {
        DLog.log(DLog.DEBUG, this.getClass(), ex);
    } finally {
        GeneralOperations.closeHibernateSession(hsession);

        if (auxDir != null) {
            auxDir.close();
        }
    }
}

From source file:com.eclipsesource.connect.search.Indexer.java

License:Open Source License

private void addDocument(IndexWriter indexWriter, Document document) {
    try {//from   w w w. ja v a2 s .c  om
        indexWriter.addDocument(document);
    } catch (IOException shouldNotHappen) {
        throw new IllegalStateException(shouldNotHappen);
    }
}

From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Indexes page using the given IndexWriter.
 *
 *  @param page WikiPage/*ww  w  . j a  va2s  .  c  om*/
 *  @param text Page text to index
 *  @param writer The Lucene IndexWriter to use for indexing
 *  @return the created index Document
 *  @throws IOException If there's an indexing problem
 */
protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer) throws IOException {
    if (log.isDebugEnabled())
        log.debug("Indexing " + page.getName() + "...");

    // make a new, empty document
    Document doc = new Document();

    if (text == null)
        return doc;

    // Raw name is the keyword we'll use to refer to this document for updates.
    Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.UN_TOKENIZED);
    doc.add(field);

    // Body text.  It is stored in the doc for search contexts.
    field = new Field(LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by page name. Both beautified and raw
    String unTokenizedTitle = StringUtils.replaceChars(page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED,
            c_punctuationSpaces);

    field = new Field(LUCENE_PAGE_NAME, TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle,
            Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by authorname

    if (page.getAuthor() != null) {
        field = new Field(LUCENE_AUTHOR, page.getAuthor(), Field.Store.YES, Field.Index.TOKENIZED,
                Field.TermVector.NO);
        doc.add(field);
    }

    // Now add the names of the attachments of this page
    try {
        Collection attachments = m_engine.getAttachmentManager().listAttachments(page);
        String attachmentNames = "";

        for (Iterator it = attachments.iterator(); it.hasNext();) {
            Attachment att = (Attachment) it.next();
            attachmentNames += att.getName() + ";";
        }
        field = new Field(LUCENE_ATTACHMENTS, attachmentNames, Field.Store.YES, Field.Index.TOKENIZED,
                Field.TermVector.NO);
        doc.add(field);

    } catch (ProviderException e) {
        // Unable to read attachments
        log.error("Failed to get attachments for page", e);
    }
    writer.addDocument(doc);

    return doc;
}

From source file:com.eden.lucene.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from www  .  j  av  a  2  s  .  c om*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 4 would mean
                // February 17, 1, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}