List of usage examples for org.apache.lucene.index IndexWriter commit
@Override public final long commit() throws IOException
Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
protected void updateDocument(Document doc, long studyId) throws IOException { try {/* w w w .jav a 2 s .com*/ IndexWriter writer = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(), IndexWriter.MaxFieldLength.UNLIMITED); writer.updateDocument(new Term("id", Long.toString(studyId)), doc); // TODO: // Figure out, eventually, what to do with the variable and file // metadata searches here. // -- L.A. /* * our deleteDocument() method contains these 2 lines, below, * in addition to the deleteDocument() method for the term based on * "id", as above. reader.deleteDocuments(new Term("varStudyId",Long.toString(studyId))); reader.deleteDocuments(new Term("versionStudyId",Long.toString(studyId))); */ writer.commit(); writer.close(); } catch (IOException ex) { ex.printStackTrace(); } }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
protected void updateStudyDocument(long studyId, String field, String value) throws IOException { IndexReader reader = IndexReader.open(dir, false); try {//from w w w. java 2 s.c o m if (reader != null) { TermDocs matchingDocuments = reader.termDocs(); if (matchingDocuments != null) { int c = 1; if (matchingDocuments.next()) { // We only expect 1 document when searching by study id. Document studyDocument = reader.document(matchingDocuments.doc()); logger.fine("processing matching document number " + c++); if (studyDocument != null) { logger.fine("got a non-zero doc;"); reader.close(); reader = null; logger.fine("deleted the document;"); //updateDocument(studyDocument, studyId); IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(), IndexWriter.MaxFieldLength.UNLIMITED); localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument); localWriter.commit(); localWriter.close(); logger.fine("wrote the updated version of the document;"); } } } } } catch (IOException ex) { ex.printStackTrace(); } finally { if (reader != null) { reader.close(); } } }
From source file:edu.stanford.moonshot.Index.java
License:Apache License
public static void main(String[] args) { // Direct way: Make a TDB-back Jena model in the named directory. String directory = "yago-jena"; Model model = TDBFactory.createModel(directory); try {/*from w ww. j a va2 s . com*/ IndexWriter indexWriter = IndexWriterFactory.create(FSDirectory.open(new File(directory + "/larq"))); IndexBuilderString larqBuilder = new IndexBuilderString(indexWriter); StmtIterator sIter = model.listStatements(); // Only index certain kinds of relationships int saved = 0; Pattern labelPattern = Pattern .compile("label|prefLabel|isPreferredMeaningOf|hasGivenName|hasFamilyName|hasGloss"); for (int i = 0; sIter.hasNext(); i++) { Statement stmt = sIter.next(); System.out.print("Indexed: " + i + " Saved: " + saved + "\r"); String stmtName = stmt.getPredicate().getLocalName(); Matcher matcher = labelPattern.matcher(stmtName); if (matcher.matches()) { larqBuilder.indexStatement(stmt); saved++; if (saved % 10000 == 9999) { indexWriter.commit(); larqBuilder.flushWriter(); } } } IndexLARQ index = larqBuilder.getIndex(); larqBuilder.closeWriter(); LARQ.setDefaultIndex(index); NodeIterator nIter = index.searchModelByIndex("+Obama"); while (nIter.hasNext()) { Literal lit = (Literal) nIter.nextNode(); System.out.println(lit); } } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private synchronized int removeEmailDocs(Collection<? extends edu.stanford.muse.index.Document> docs) throws IOException { if (iwriter != null) { throw new IOException("iwriter is not null. prepareForSerialization() should be called first."); }//from w ww. j ava2 s .c o m if (isearcher != null) { isearcher.getIndexReader().close(); isearcher = null; } stats = null; // stats no longer valid int count = docIdToEmailDoc.size(); IndexWriterConfig cfg = new IndexWriterConfig(LUCENE_VERSION, analyzer); IndexWriter writer = new IndexWriter(directory, cfg); //IndexWriter writer = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(250000)); assert (writer.numDocs() == docIdToEmailDoc.size()); for (edu.stanford.muse.index.Document d : docs) { String id = d.getUniqueId(); EmailDocument ed = docIdToEmailDoc.get(id); assert (d == ed); docIdToEmailDoc.remove(id); writer.deleteDocuments(new TermQuery(new Term("docId", id))); log.info("Removed doc " + id + " from index"); } writer.commit(); assert (writer.numDocs() == docIdToEmailDoc.size()); writer.close(); count -= docIdToEmailDoc.size(); // number of removed docs assert (count == docs.size()); return count; }
From source file:edu.unika.aifb.graphindex.index.KeywordIndexBuilder.java
License:Open Source License
public void indexKeywords() throws StorageException, IOException { File indexDir = idxDirectory.getDirectory(IndexDirectory.KEYWORD_DIR, !resume); File valueDir = idxDirectory.getDirectory(IndexDirectory.VALUE_DIR, !resume); this.objectProperties = Util.readEdgeSet(idxDirectory.getFile(IndexDirectory.OBJECT_PROPERTIES_FILE)); this.relations = Util.readEdgeSet(idxDirectory.getTempFile("relations", false)); this.attributes = Util.readEdgeSet(idxDirectory.getTempFile("attributes", false)); properties = new HashSet<String>(); properties.addAll(relations);//from w w w .j a v a2 s .com properties.addAll(attributes); log.debug("attributes: " + attributes.size() + ", relations: " + relations.size()); try { // HyphenationCompoundWordAnalyzer analyzer = new HyphenationCompoundWordAnalyzer("./res/en_hyph_US.xml", "./res/en_US.dic"); // DictionaryCompoundWordAnalyzer analyzer = new DictionaryCompoundWordAnalyzer("./res/en_US.dic"); CapitalizationSplitterAnalyzer analyzer = new CapitalizationSplitterAnalyzer(); StandardAnalyzer valueAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, analyzer, !resume, new MaxFieldLength(MAXFIELDLENGTH)); log.debug("max terms per field: " + indexWriter.getMaxFieldLength()); valueWriter = new IndexWriter(valueDir, valueAnalyzer, !resume, new MaxFieldLength(MAXFIELDLENGTH)); org.apache.lucene.index.IndexReader reader = null; if (resume) { reader = org.apache.lucene.index.IndexReader.open(FSDirectory.getDirectory(indexDir), true); log.debug("docs: " + reader.numDocs()); } if (!resume) { log.info("Indexing concepts"); indexSchema(indexWriter, idxDirectory.getTempFile("concepts", false), TypeUtil.CONCEPT, CONCEPT_BOOST); log.info("Indexing attributes"); indexSchema(indexWriter, idxDirectory.getTempFile("attributes", false), TypeUtil.ATTRIBUTE, ATTRIBUTE_BOOST); log.info("Indexing relations"); indexSchema(indexWriter, idxDirectory.getTempFile("relations", false), TypeUtil.RELATION, RELATION_BOOST); } log.info("Indexing entities"); indexEntity(indexWriter, idxDirectory.getTempFile("entities", false), reader); indexWriter.commit(); valueWriter.commit(); log.debug("optimizing..."); indexWriter.optimize(); valueWriter.optimize(); indexWriter.close(); valueWriter.close(); if (blockSearcher != null) blockSearcher.close(); ns.optimize(); ns.close(); } catch (IOException e) { e.printStackTrace(); } catch (DatabaseException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:edu.unika.aifb.graphindex.index.KeywordIndexBuilder.java
License:Open Source License
private void indexEntity(IndexWriter indexWriter, File file, IndexReader reader) throws IOException, StorageException { try {/*w w w . ja va 2 s . c o m*/ BufferedReader br = new BufferedReader(new FileReader(file)); String line; int entities = 0; double time = System.currentTimeMillis(); while ((line = br.readLine()) != null) { String uri = line.trim(); if (reader != null) { TermDocs td = reader.termDocs(new Term(Constant.URI_FIELD, uri)); if (td.next()) continue; } Document doc = new Document(); List<Field> fields = getFieldsForEntity(uri); if (fields == null) continue; for (Field f : fields) doc.add(f); indexWriter.addDocument(doc); // indexWriter.commit(); entities++; if (entities % 100000 == 0) { indexWriter.commit(); valueWriter.commit(); ns.commit(); log.debug("entities indexed: " + entities + " avg: " + ((System.currentTimeMillis() - time) / 100000.0)); time = System.currentTimeMillis(); } } br.close(); log.debug(entities + " entities indexed"); } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.unika.aifb.graphindex.storage.lucene.LuceneIndexStorage.java
License:Open Source License
public void mergeSingleIndex(IndexDescription index) throws StorageException { try {//from w ww. java2s . c om reopen(); int termsProcessed = 0, numTerms = 0, docsMerged = 0, maxValues = 0; TermEnum te = m_reader.terms(); while (te.next()) numTerms++; te.close(); log.debug("terms: " + numTerms); if (numTerms == m_reader.maxDoc()) { log.debug("only one doc for each term, no merge necessary"); log.debug("optimizing"); optimize(); return; } m_writer.close(); File newDir = new File(m_directory.getAbsolutePath().substring(0, m_directory.getAbsolutePath().lastIndexOf(File.separator)) + File.separator + index.getIndexFieldName() + "_merged"); log.debug("writing to " + newDir); IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(newDir), new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); writer.setMergeFactor(30); // writer.setTermIndexInterval(IndexWriter.DEFAULT_TERM_INDEX_INTERVAL / 2); te = m_reader.terms(new Term(index.getIndexFieldName(), "")); do { Term t = te.term(); if (!t.field().equals(index.getIndexFieldName())) break; List<Integer> docIds = getDocumentIds(new TermQuery(t)); docsMerged += docIds.size(); TreeSet<String> values = new TreeSet<String>(); for (int docId : docIds) { Document doc = getDocument(docId); values.add(doc.getField(index.getValueFieldName()).stringValue().trim()); } if (maxValues < values.size()) maxValues = values.size(); StringBuilder sb = new StringBuilder(); for (String s : values) sb.append(s).append('\n'); Document doc = new Document(); doc.add(getIndexedField(index, t.text())); doc.add(getStoredField(index.getValueField(), sb.toString())); writer.addDocument(doc); termsProcessed++; if (termsProcessed % 1000000 == 0) { writer.commit(); System.gc(); log.debug("terms: " + termsProcessed + "/" + numTerms + ", docs merged: " + docsMerged + ", max values: " + maxValues + ", " + Util.memory()); } } while (te.next()); te.close(); m_searcher.close(); m_reader.close(); log.debug("optimizing new index"); writer.commit(); writer.optimize(); writer.close(); Util.deleteDirectory(m_directory); newDir.renameTo(m_directory); initialize(false, m_readonly); } catch (IOException e) { throw new StorageException(e); } }
From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java
License:Apache License
public void add(List<GroupWorkspace> groupWorkspaces, File indexFolder, boolean overwriteExistingIndex) { LinkedList<Document> docs = new LinkedList<Document>(); for (GroupWorkspace w : groupWorkspaces) { log.debug("Adding workspace " + w); docs.add(getDocument(w));/*from w w w . j a v a 2 s. c om*/ } IndexWriter writer = null; Directory directory = null; try { directory = FSDirectory.open(indexFolder); if (overwriteExistingIndex) { writer = getWriterOverwriteExisting(directory); } else { writer = getWriter(directory); } for (Document d : docs) { writer.addDocument(d); } writer.commit(); } catch (Exception e) { log.error(e); errorEmailService.sendError(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { log.error(e); } } writer = null; try { IndexWriter.unlock(directory); } catch (IOException e1) { log.error(e1); } if (directory != null) { try { directory.close(); } catch (Exception e) { log.error(e); } } directory = null; docs = null; } }
From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java
License:Apache License
public void delete(Long groupWorkspaceId, File indexFolder) { if (log.isDebugEnabled()) { log.debug("deleting researcher id : " + groupWorkspaceId + " from index folder " + indexFolder.getAbsolutePath()); }/*w w w. ja v a2 s. c o m*/ // if the researcher does not have an index folder // don't need to do anything. if (indexFolder == null || !indexFolder.exists() || indexFolder.list() == null || indexFolder.list().length == 0) { return; } Directory directory = null; IndexWriter writer = null; try { directory = FSDirectory.open(indexFolder); writer = getWriter(directory); Term term = new Term(ID, NumericUtils.longToPrefixCoded(groupWorkspaceId)); writer.deleteDocuments(term); writer.commit(); } catch (Exception e) { log.error(e); errorEmailService.sendError(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { log.error(e); } } writer = null; try { IndexWriter.unlock(directory); } catch (IOException e1) { log.error(e1); } if (directory != null) { try { directory.close(); } catch (Exception e) { log.error(e); } } directory = null; } }
From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceIndexService.java
License:Apache License
/** * Write the document to the index in the directory. * /*from w w w . j a v a2s . co m*/ * @param directoryPath - location where the directory exists. * @param documents - documents to add to the directory. */ private void writeDocument(File directoryPath, Document document) { log.debug("write document to directory " + directoryPath); Directory directory = null; IndexWriter writer = null; try { directory = FSDirectory.open(directoryPath); writer = getWriter(directory); writer.addDocument(document); writer.commit(); } catch (Exception e) { log.error(e); errorEmailService.sendError(e); } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { log.error(e); } } writer = null; try { IndexWriter.unlock(directory); } catch (IOException e1) { log.error(e1); } if (directory != null) { try { directory.close(); } catch (Exception e) { log.error(e); } } directory = null; } }