List of usage examples for org.apache.lucene.index IndexWriter commit
@Override public final long commit() throws IOException
Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.
From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/* w w w. j av a 2 s . c o m*/ Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); String text = (String) attrs.get("text"); indexDir = FSDirectory.open(new File(index).toPath()); Document document = LuceneExampleUtil.createDocument(uniqueId, text); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.updateDocument(new Term("id", document.get("id")), document); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", -1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {//from w w w . ja v a2 s.c o m Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }
From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java
License:Apache License
void buildIndex(File indexDir, IndexWriter indexWriter) throws IOException { File[] groupsDir = indexDir.listFiles(); if (groupsDir != null) { for (File group : groupsDir) { String groupName = group.getName(); File[] posts = group.listFiles(); if (posts != null) { for (File postFile : posts) { String number = postFile.getName(); NewsPost post = parse(postFile, groupName, number); Document d = new Document(); d.add(new StringField(CATEGORY_FIELD, post.getGroup(), Field.Store.YES)); d.add(new SortedDocValuesField(CATEGORY_FIELD, new BytesRef(post.getGroup()))); d.add(new TextField(SUBJECT_FIELD, post.getSubject(), Field.Store.YES)); d.add(new TextField(BODY_FIELD, post.getBody(), Field.Store.YES)); indexWriter.addDocument(d); }//from w ww . j ava2s . c o m } } } indexWriter.commit(); }
From source file:com.github.tteofili.looseen.TestWikipediaClassification.java
License:Apache License
private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception { long start = System.currentTimeMillis(); int count = 0; System.out.format("Importing %s...%n", dump); String title = null;//from w w w. j a v a2s . co m String text = null; Set<String> cats = new HashSet<>(); XMLInputFactory factory = XMLInputFactory.newInstance(); StreamSource source; if (dump.getName().endsWith(".xml")) { source = new StreamSource(dump); } else { throw new RuntimeException("can index only wikipedia XML files"); } XMLStreamReader reader = factory.createXMLStreamReader(source); while (reader.hasNext()) { if (count == Integer.MAX_VALUE) { break; } switch (reader.next()) { case XMLStreamConstants.START_ELEMENT: if ("title".equals(reader.getLocalName())) { title = reader.getElementText(); } else if (TEXT_FIELD.equals(reader.getLocalName())) { text = reader.getElementText(); Matcher matcher = pattern.matcher(text); int pos = 0; while (matcher.find(pos)) { String group = matcher.group(1); String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", ""); Collections.addAll(cats, catName.split("\\|")); pos = matcher.end(); } } break; case XMLStreamConstants.END_ELEMENT: if ("page".equals(reader.getLocalName())) { Document page = new Document(); if (title != null) { page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES)); } if (text != null) { page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES)); } for (String cat : cats) { page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES)); page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat))); } indexWriter.addDocument(page); cats.clear(); count++; if (count % 100000 == 0) { indexWriter.commit(); System.out.format("Committed %d pages%n", count); } } break; } } indexWriter.commit(); long millis = System.currentTimeMillis() - start; System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000, (double) millis / count); }
From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
public void create(IndexObject indexObject) { IndexWriter indexWriter = null; try {/*w w w . j a v a 2 s .co m*/ IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer()); indexWriter = new IndexWriter(this.getDirectory(), config); indexWriter.addDocument(DocumentUtil.IndexObject2Document(indexObject)); indexWriter.commit(); } catch (Exception e) { e.printStackTrace(); try { indexWriter.rollback(); } catch (IOException e1) { e1.printStackTrace(); } } finally { try { indexWriter.close(); } catch (IOException e1) { e1.printStackTrace(); } } }
From source file:com.globalsight.ling.lucene.Index.java
License:Apache License
/** * Ends the batch re-creation of an index by clearing out the old * index files, writing the new in-memory index to disk, and * setting the index state to STATE_OPENED. * * @see #batchOpen()//from w w w . j ava 2 s . c om */ public void batchDone() throws IOException { synchronized (m_state) { if (m_state != STATE_CREATING) { throw new IOException("index is not being re-created"); } } // try to unlock this dir : for unexpected shutdown try { if (IndexWriter.isLocked(m_fsDir)) { IndexWriter.unlock(m_fsDir); } } catch (Exception ee) { // ignore } // Tho reports it can happen that the index cannot be created // on disk (GSDEF00012703). Trap this and release the memory // of the ram directory. IndexWriter diskwriter = null; try { // MUST optimize RAMDirectory before writing it to disk. // m_ramIndexWriter.optimize(); // Write all data out to disk, optimize and clean up. diskwriter = getIndexWriter(true); diskwriter.commit(); Directory[] ds = new Directory[] { m_ramdir }; //Directory[] ds = new Directory[] { m_fsDir } ; diskwriter.addIndexes(ds); //diskwriter.optimize(); //diskwriter.close(); // clean cache if have LuceneCache.cleanLuceneCache(m_directory); } catch (IOException ex) { CATEGORY.error("unexpected error when persisting index " + m_directory, ex); throw ex; } catch (Throwable ex) { CATEGORY.error("unexpected error when persisting index " + m_directory, ex); throw new IOException(ex.getMessage()); } finally { IOUtils.closeWhileHandlingException(diskwriter, m_ramIndexWriter, m_ramdir); m_ramIndexWriter = null; m_ramdir = null; m_state = STATE_OPENED; } }
From source file:com.globalsight.ling.tm2.lucene.LuceneIndexWriter.java
License:Apache License
public void remove(Collection p_tuvs) throws Exception { IndexWriterConfig conf = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(m_directory, conf); try {//from w w w . j ava 2 s. c o m for (Iterator it = p_tuvs.iterator(); it.hasNext();) { Object tuv = it.next(); Long id = tuv instanceof BaseTmTuv ? ((BaseTmTuv) tuv).getId() : tuv instanceof TM3Tuv ? ((TM3Tuv) tuv).getId() : null; Term term = new Term(TuvDocument.TUV_ID_FIELD, id.toString()); writer.deleteDocuments(term); } } catch (Throwable e) { c_logger.error(e.getMessage(), e); //indexReader.undeleteAll(); throw (e instanceof Exception ? (Exception) e : new Exception(e)); } finally { writer.commit(); writer.close(); } // clean cache if have LuceneCache.cleanLuceneCache(m_indexDir); }
From source file:com.greplin.lucene.filter.PhraseFilterBenchmark.java
License:Apache License
public static void main(String[] argv) { Directory directory = new RAMDirectory(); try {/*w w w . j ava2 s . co m*/ IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32))); int done = 0; for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) { int remaining = NUMBER_OF_SEGMENTS - i; int numberOfDocs; if (remaining == 1) { numberOfDocs = TOTAL_DOCS - done; } else { numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1; } done += numberOfDocs; System.out.println("Segment #" + i + " has " + numberOfDocs + " docs"); for (int d = 0; d < numberOfDocs; d++) { int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION; Document doc = new Document(); doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.commit(); } writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); String[][] queries = new String[TOTAL_QUERIES][]; Term[][] terms = new Term[TOTAL_QUERIES][]; for (int q = 0; q < TOTAL_QUERIES; q++) { queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]); terms[q] = new Term[queries[q].length]; for (int qw = 0; qw < queries[q].length; qw++) { terms[q][qw] = new Term(FIELD, queries[q][qw]); } } // Warm up. new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader); for (int round = 0; round < ROUNDS; round++) { System.out.println(); String name1 = "filter"; String name2 = "query"; long ms1 = 0, ms2 = 0; for (int step = 0; step < 2; step++) { System.gc(); System.gc(); System.gc(); if (step == (round & 1)) { long millis = System.currentTimeMillis(); long hits = 0; for (String[] queryWords : queries) { PhraseFilter pf = new PhraseFilter( new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords); hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits; } ms1 = System.currentTimeMillis() - millis; System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits"); } else { long millis = System.currentTimeMillis(); long hits = 0; for (Term[] queryTerms : terms) { PhraseQuery pq = new PhraseQuery(); for (Term term : queryTerms) { pq.add(term); } Query query = BooleanQueryBuilder.builder() .must(new TermQuery(new Term("second", "yes"))).must(pq).build(); hits += searcher.search(query, 1).totalHits; } ms2 = System.currentTimeMillis() - millis; System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits"); } } System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2); } } catch (IOException e) { e.printStackTrace(); } }
From source file:com.impetus.kundera.index.LucandraIndexer.java
License:Apache License
@Override public final void index(EntityMetadata metadata, Object object) { if (!metadata.isIndexable()) { return;// w ww . j a va 2s . co m } log.debug("Indexing @Entity[" + metadata.getEntityClazz().getName() + "] " + object); String indexName = metadata.getIndexName(); Document document = new Document(); Field luceneField; // index row try { String id = PropertyAccessorHelper.getId(object, metadata); luceneField = new Field(ENTITY_ID_FIELD, id, // adding class // namespace Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); document.add(luceneField); // index namespace for unique deletion luceneField = new Field(KUNDERA_ID_FIELD, getKunderaId(metadata, id), // adding // class // namespace Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); document.add(luceneField); // index entity class luceneField = new Field(ENTITY_CLASS_FIELD, metadata.getEntityClazz().getCanonicalName().toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); document.add(luceneField); // index index name luceneField = new Field(ENTITY_INDEXNAME_FIELD, metadata.getIndexName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); document.add(luceneField); } catch (PropertyAccessException e) { throw new IllegalArgumentException("Id could not be read."); } // now index all indexable properties for (PropertyIndex index : metadata.getIndexProperties()) { java.lang.reflect.Field property = index.getProperty(); String propertyName = index.getName(); try { String value = PropertyAccessorHelper.getString(object, property).toString(); luceneField = new Field(getCannonicalPropertyName(indexName, propertyName), value, Field.Store.NO, Field.Index.ANALYZED); document.add(luceneField); } catch (PropertyAccessException e) { // TODO: do something with the exceptions // e.printStackTrace(); } } // flush the indexes try { log.debug("Flushing to Lucandra: " + document); if (!metadata.getDBType().equals(DBType.CASSANDRA)) { IndexWriter w = getDefaultIndexWriter(); w.addDocument(document, analyzer); w.optimize(); w.commit(); w.close(); } else { getIndexWriter().addDocument(document, analyzer); } } catch (CorruptIndexException e) { throw new IndexingException(e.getMessage()); } catch (IOException e) { throw new IndexingException(e.getMessage()); } }
From source file:com.ivannotes.searchbee.SearchBee.java
License:Apache License
public final void doIndex(DataFetcher<T> df) throws CorruptIndexException, IOException { df.reset();//ww w.j av a 2s. c om IndexWriter idxWriter = getIndexWriter(); int contiuousException = 0; try { while (df.hasMore()) { try { List<T> data = df.fetchData(); for (T bean : data) { Document doc = buildDocument(bean); idxWriter.addDocument(doc); } idxWriter.commit(); contiuousException = 0; } catch (Exception e) { contiuousException++; logger.error("build index error", e); if (contiuousException > 100) { logger.error("build index exceed max continuous exception count(100), exit build."); break; } } } } finally { if (null != idxWriter) { idxWriter.close(); } } }