Example usage for org.apache.lucene.index IndexWriter commit

List of usage examples for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException 

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

protected void updateDocument(Document doc, long studyId) throws IOException {
    try {/*  w w w .jav  a  2 s  .com*/
        IndexWriter writer = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(),
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.updateDocument(new Term("id", Long.toString(studyId)), doc);
        // TODO: 
        // Figure out, eventually, what to do with the variable and file 
        // metadata searches here. 
        // -- L.A. 
        /*
         * our deleteDocument() method contains these 2 lines, below, 
         * in addition to the deleteDocument() method for the term based on 
         * "id", as above. 
        reader.deleteDocuments(new Term("varStudyId",Long.toString(studyId)));
        reader.deleteDocuments(new Term("versionStudyId",Long.toString(studyId)));
         */
        writer.commit();
        writer.close();
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

protected void updateStudyDocument(long studyId, String field, String value) throws IOException {
    IndexReader reader = IndexReader.open(dir, false);

    try {//from   w  w  w. java 2 s.c  o  m
        if (reader != null) {
            TermDocs matchingDocuments = reader.termDocs();

            if (matchingDocuments != null) {
                int c = 1;
                if (matchingDocuments.next()) {
                    // We only expect 1 document when searching by study id.
                    Document studyDocument = reader.document(matchingDocuments.doc());

                    logger.fine("processing matching document number " + c++);
                    if (studyDocument != null) {
                        logger.fine("got a non-zero doc;");

                        reader.close();
                        reader = null;

                        logger.fine("deleted the document;");

                        //updateDocument(studyDocument, studyId);
                        IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(),
                                IndexWriter.MaxFieldLength.UNLIMITED);
                        localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument);

                        localWriter.commit();
                        localWriter.close();
                        logger.fine("wrote the updated version of the document;");

                    }
                }
            }
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
}

From source file:edu.stanford.moonshot.Index.java

License:Apache License

public static void main(String[] args) {
    // Direct way: Make a TDB-back Jena model in the named directory.
    String directory = "yago-jena";
    Model model = TDBFactory.createModel(directory);

    try {/*from   w  ww.  j  a  va2 s .  com*/
        IndexWriter indexWriter = IndexWriterFactory.create(FSDirectory.open(new File(directory + "/larq")));
        IndexBuilderString larqBuilder = new IndexBuilderString(indexWriter);
        StmtIterator sIter = model.listStatements();

        // Only index certain kinds of relationships

        int saved = 0;
        Pattern labelPattern = Pattern
                .compile("label|prefLabel|isPreferredMeaningOf|hasGivenName|hasFamilyName|hasGloss");

        for (int i = 0; sIter.hasNext(); i++) {
            Statement stmt = sIter.next();
            System.out.print("Indexed: " + i + " Saved: " + saved + "\r");
            String stmtName = stmt.getPredicate().getLocalName();
            Matcher matcher = labelPattern.matcher(stmtName);
            if (matcher.matches()) {
                larqBuilder.indexStatement(stmt);
                saved++;
                if (saved % 10000 == 9999) {
                    indexWriter.commit();
                    larqBuilder.flushWriter();
                }
            }
        }

        IndexLARQ index = larqBuilder.getIndex();

        larqBuilder.closeWriter();

        LARQ.setDefaultIndex(index);

        NodeIterator nIter = index.searchModelByIndex("+Obama");
        while (nIter.hasNext()) {
            Literal lit = (Literal) nIter.nextNode();
            System.out.println(lit);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private synchronized int removeEmailDocs(Collection<? extends edu.stanford.muse.index.Document> docs)
        throws IOException {
    if (iwriter != null) {
        throw new IOException("iwriter is not null. prepareForSerialization() should be called first.");
    }//from w  ww. j ava2  s .c  o  m

    if (isearcher != null) {
        isearcher.getIndexReader().close();
        isearcher = null;
    }

    stats = null; // stats no longer valid

    int count = docIdToEmailDoc.size();

    IndexWriterConfig cfg = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    IndexWriter writer = new IndexWriter(directory, cfg);
    //IndexWriter writer = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(250000));
    assert (writer.numDocs() == docIdToEmailDoc.size());

    for (edu.stanford.muse.index.Document d : docs) {
        String id = d.getUniqueId();
        EmailDocument ed = docIdToEmailDoc.get(id);
        assert (d == ed);
        docIdToEmailDoc.remove(id);
        writer.deleteDocuments(new TermQuery(new Term("docId", id)));
        log.info("Removed doc " + id + " from index");
    }

    writer.commit();

    assert (writer.numDocs() == docIdToEmailDoc.size());

    writer.close();

    count -= docIdToEmailDoc.size(); // number of removed docs
    assert (count == docs.size());
    return count;
}

From source file:edu.unika.aifb.graphindex.index.KeywordIndexBuilder.java

License:Open Source License

public void indexKeywords() throws StorageException, IOException {
    File indexDir = idxDirectory.getDirectory(IndexDirectory.KEYWORD_DIR, !resume);
    File valueDir = idxDirectory.getDirectory(IndexDirectory.VALUE_DIR, !resume);

    this.objectProperties = Util.readEdgeSet(idxDirectory.getFile(IndexDirectory.OBJECT_PROPERTIES_FILE));
    this.relations = Util.readEdgeSet(idxDirectory.getTempFile("relations", false));
    this.attributes = Util.readEdgeSet(idxDirectory.getTempFile("attributes", false));
    properties = new HashSet<String>();
    properties.addAll(relations);//from w w  w  .j a v a2 s  .com
    properties.addAll(attributes);

    log.debug("attributes: " + attributes.size() + ", relations: " + relations.size());

    try {
        //         HyphenationCompoundWordAnalyzer analyzer = new HyphenationCompoundWordAnalyzer("./res/en_hyph_US.xml", "./res/en_US.dic");
        //         DictionaryCompoundWordAnalyzer analyzer = new DictionaryCompoundWordAnalyzer("./res/en_US.dic");
        CapitalizationSplitterAnalyzer analyzer = new CapitalizationSplitterAnalyzer();
        StandardAnalyzer valueAnalyzer = new StandardAnalyzer();
        IndexWriter indexWriter = new IndexWriter(indexDir, analyzer, !resume,
                new MaxFieldLength(MAXFIELDLENGTH));
        log.debug("max terms per field: " + indexWriter.getMaxFieldLength());

        valueWriter = new IndexWriter(valueDir, valueAnalyzer, !resume, new MaxFieldLength(MAXFIELDLENGTH));

        org.apache.lucene.index.IndexReader reader = null;
        if (resume) {
            reader = org.apache.lucene.index.IndexReader.open(FSDirectory.getDirectory(indexDir), true);
            log.debug("docs: " + reader.numDocs());
        }

        if (!resume) {
            log.info("Indexing concepts");
            indexSchema(indexWriter, idxDirectory.getTempFile("concepts", false), TypeUtil.CONCEPT,
                    CONCEPT_BOOST);

            log.info("Indexing attributes");
            indexSchema(indexWriter, idxDirectory.getTempFile("attributes", false), TypeUtil.ATTRIBUTE,
                    ATTRIBUTE_BOOST);

            log.info("Indexing relations");
            indexSchema(indexWriter, idxDirectory.getTempFile("relations", false), TypeUtil.RELATION,
                    RELATION_BOOST);
        }

        log.info("Indexing entities");
        indexEntity(indexWriter, idxDirectory.getTempFile("entities", false), reader);

        indexWriter.commit();
        valueWriter.commit();

        log.debug("optimizing...");
        indexWriter.optimize();
        valueWriter.optimize();

        indexWriter.close();
        valueWriter.close();

        if (blockSearcher != null)
            blockSearcher.close();
        ns.optimize();
        ns.close();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (DatabaseException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:edu.unika.aifb.graphindex.index.KeywordIndexBuilder.java

License:Open Source License

private void indexEntity(IndexWriter indexWriter, File file, IndexReader reader)
        throws IOException, StorageException {
    try {/*w  w w  . ja va 2  s  .  c  o m*/
        BufferedReader br = new BufferedReader(new FileReader(file));
        String line;
        int entities = 0;
        double time = System.currentTimeMillis();
        while ((line = br.readLine()) != null) {
            String uri = line.trim();

            if (reader != null) {
                TermDocs td = reader.termDocs(new Term(Constant.URI_FIELD, uri));
                if (td.next())
                    continue;
            }

            Document doc = new Document();

            List<Field> fields = getFieldsForEntity(uri);

            if (fields == null)
                continue;

            for (Field f : fields)
                doc.add(f);

            indexWriter.addDocument(doc);

            //            indexWriter.commit();

            entities++;
            if (entities % 100000 == 0) {
                indexWriter.commit();
                valueWriter.commit();
                ns.commit();
                log.debug("entities indexed: " + entities + " avg: "
                        + ((System.currentTimeMillis() - time) / 100000.0));
                time = System.currentTimeMillis();
            }
        }
        br.close();

        log.debug(entities + " entities indexed");
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:edu.unika.aifb.graphindex.storage.lucene.LuceneIndexStorage.java

License:Open Source License

public void mergeSingleIndex(IndexDescription index) throws StorageException {
    try {//from   w  ww.  java2s  .  c om
        reopen();

        int termsProcessed = 0, numTerms = 0, docsMerged = 0, maxValues = 0;
        TermEnum te = m_reader.terms();
        while (te.next())
            numTerms++;
        te.close();
        log.debug("terms: " + numTerms);

        if (numTerms == m_reader.maxDoc()) {
            log.debug("only one doc for each term, no merge necessary");
            log.debug("optimizing");
            optimize();
            return;
        }

        m_writer.close();

        File newDir = new File(m_directory.getAbsolutePath().substring(0,
                m_directory.getAbsolutePath().lastIndexOf(File.separator)) + File.separator
                + index.getIndexFieldName() + "_merged");
        log.debug("writing to " + newDir);
        IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(newDir), new WhitespaceAnalyzer(), true,
                MaxFieldLength.UNLIMITED);
        writer.setMergeFactor(30);
        //         writer.setTermIndexInterval(IndexWriter.DEFAULT_TERM_INDEX_INTERVAL / 2); 

        te = m_reader.terms(new Term(index.getIndexFieldName(), ""));
        do {
            Term t = te.term();

            if (!t.field().equals(index.getIndexFieldName()))
                break;

            List<Integer> docIds = getDocumentIds(new TermQuery(t));
            docsMerged += docIds.size();

            TreeSet<String> values = new TreeSet<String>();
            for (int docId : docIds) {
                Document doc = getDocument(docId);
                values.add(doc.getField(index.getValueFieldName()).stringValue().trim());
            }

            if (maxValues < values.size())
                maxValues = values.size();

            StringBuilder sb = new StringBuilder();
            for (String s : values)
                sb.append(s).append('\n');

            Document doc = new Document();
            doc.add(getIndexedField(index, t.text()));
            doc.add(getStoredField(index.getValueField(), sb.toString()));
            writer.addDocument(doc);

            termsProcessed++;

            if (termsProcessed % 1000000 == 0) {
                writer.commit();
                System.gc();
                log.debug("terms: " + termsProcessed + "/" + numTerms + ", docs merged: " + docsMerged
                        + ", max values: " + maxValues + ", " + Util.memory());
            }
        } while (te.next());
        te.close();

        m_searcher.close();
        m_reader.close();

        log.debug("optimizing new index");
        writer.commit();
        writer.optimize();
        writer.close();

        Util.deleteDirectory(m_directory);
        newDir.renameTo(m_directory);
        initialize(false, m_readonly);
    } catch (IOException e) {
        throw new StorageException(e);
    }

}

From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java

License:Apache License

public void add(List<GroupWorkspace> groupWorkspaces, File indexFolder, boolean overwriteExistingIndex) {
    LinkedList<Document> docs = new LinkedList<Document>();

    for (GroupWorkspace w : groupWorkspaces) {
        log.debug("Adding workspace " + w);
        docs.add(getDocument(w));/*from  w w w . j a v a 2  s. c om*/
    }

    IndexWriter writer = null;
    Directory directory = null;
    try {
        directory = FSDirectory.open(indexFolder);

        if (overwriteExistingIndex) {
            writer = getWriterOverwriteExisting(directory);
        } else {
            writer = getWriter(directory);
        }

        for (Document d : docs) {
            writer.addDocument(d);
        }
        writer.commit();
    } catch (Exception e) {
        log.error(e);
        errorEmailService.sendError(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        writer = null;
        try {
            IndexWriter.unlock(directory);
        } catch (IOException e1) {
            log.error(e1);
        }

        if (directory != null) {
            try {
                directory.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        directory = null;
        docs = null;
    }

}

From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java

License:Apache License

public void delete(Long groupWorkspaceId, File indexFolder) {
    if (log.isDebugEnabled()) {
        log.debug("deleting researcher id : " + groupWorkspaceId + " from index folder "
                + indexFolder.getAbsolutePath());
    }/*w w  w. ja v a2  s.  c o  m*/
    // if the researcher does not have an index folder
    // don't need to do anything.
    if (indexFolder == null || !indexFolder.exists() || indexFolder.list() == null
            || indexFolder.list().length == 0) {
        return;
    }

    Directory directory = null;
    IndexWriter writer = null;
    try {
        directory = FSDirectory.open(indexFolder);
        writer = getWriter(directory);
        Term term = new Term(ID, NumericUtils.longToPrefixCoded(groupWorkspaceId));
        writer.deleteDocuments(term);
        writer.commit();

    } catch (Exception e) {
        log.error(e);
        errorEmailService.sendError(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        writer = null;
        try {
            IndexWriter.unlock(directory);
        } catch (IOException e1) {
            log.error(e1);
        }

        if (directory != null) {
            try {
                directory.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        directory = null;
    }

}

From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java

License:Apache License

/**
 * Write the document to the index in the directory.
 * /*from  w w w  .  j  a v  a2s . co  m*/
 * @param directoryPath - location where the directory exists.
 * @param documents - documents to add to the directory.
 */
private void writeDocument(File directoryPath, Document document) {
    log.debug("write document to directory " + directoryPath);
    Directory directory = null;
    IndexWriter writer = null;
    try {
        directory = FSDirectory.open(directoryPath);
        writer = getWriter(directory);
        writer.addDocument(document);
        writer.commit();
    } catch (Exception e) {
        log.error(e);
        errorEmailService.sendError(e);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        writer = null;
        try {
            IndexWriter.unlock(directory);
        } catch (IOException e1) {
            log.error(e1);
        }

        if (directory != null) {
            try {
                directory.close();
            } catch (Exception e) {
                log.error(e);
            }
        }
        directory = null;
    }
}