Example usage for org.apache.lucene.index IndexWriter commit

List of usage examples for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException 

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;/* w w  w. j  av  a  2 s  . c  o  m*/
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.updateDocument(new Term("id", document.get("id")), document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", -1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount,
        int hashSize) {
    this.min = min;
    this.hashCount = hashCount;
    this.hashSize = hashSize;
    try {//from  w  w w  . ja v a2  s.c o  m
        Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        directory = new RAMDirectory();
        IndexWriter writer = new IndexWriter(directory, config);
        for (int i = 0; i < reader.maxDoc(); i++) {
            Document document = new Document();
            Document d = reader.document(i);
            String textValue = d.getField(textField).stringValue();
            String categoryValue = d.getField(categoryField).stringValue();
            document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO));
            document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES));
            writer.addDocument(document);
        }
        writer.commit();
        writer.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
}

From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java

License:Apache License

void buildIndex(File indexDir, IndexWriter indexWriter) throws IOException {
    File[] groupsDir = indexDir.listFiles();
    if (groupsDir != null) {
        for (File group : groupsDir) {
            String groupName = group.getName();
            File[] posts = group.listFiles();
            if (posts != null) {
                for (File postFile : posts) {
                    String number = postFile.getName();
                    NewsPost post = parse(postFile, groupName, number);
                    Document d = new Document();
                    d.add(new StringField(CATEGORY_FIELD, post.getGroup(), Field.Store.YES));
                    d.add(new SortedDocValuesField(CATEGORY_FIELD, new BytesRef(post.getGroup())));
                    d.add(new TextField(SUBJECT_FIELD, post.getSubject(), Field.Store.YES));
                    d.add(new TextField(BODY_FIELD, post.getBody(), Field.Store.YES));
                    indexWriter.addDocument(d);
                }//from w  ww  .  j  ava2s  .  c o  m
            }
        }
    }
    indexWriter.commit();
}

From source file:com.github.tteofili.looseen.TestWikipediaClassification.java

License:Apache License

private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception {
    long start = System.currentTimeMillis();
    int count = 0;
    System.out.format("Importing %s...%n", dump);

    String title = null;//from   w w w.  j a  v  a2s  .  co m
    String text = null;
    Set<String> cats = new HashSet<>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    StreamSource source;
    if (dump.getName().endsWith(".xml")) {
        source = new StreamSource(dump);
    } else {
        throw new RuntimeException("can index only wikipedia XML files");
    }
    XMLStreamReader reader = factory.createXMLStreamReader(source);
    while (reader.hasNext()) {
        if (count == Integer.MAX_VALUE) {
            break;
        }
        switch (reader.next()) {
        case XMLStreamConstants.START_ELEMENT:
            if ("title".equals(reader.getLocalName())) {
                title = reader.getElementText();
            } else if (TEXT_FIELD.equals(reader.getLocalName())) {
                text = reader.getElementText();
                Matcher matcher = pattern.matcher(text);
                int pos = 0;
                while (matcher.find(pos)) {
                    String group = matcher.group(1);
                    String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", "");
                    Collections.addAll(cats, catName.split("\\|"));
                    pos = matcher.end();
                }
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            if ("page".equals(reader.getLocalName())) {
                Document page = new Document();
                if (title != null) {
                    page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES));
                }
                if (text != null) {
                    page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES));
                }
                for (String cat : cats) {
                    page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES));
                    page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat)));
                }
                indexWriter.addDocument(page);
                cats.clear();
                count++;
                if (count % 100000 == 0) {
                    indexWriter.commit();
                    System.out.format("Committed %d pages%n", count);
                }
            }
            break;
        }
    }

    indexWriter.commit();

    long millis = System.currentTimeMillis() - start;
    System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000,
            (double) millis / count);
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

public void create(IndexObject indexObject) {

    IndexWriter indexWriter = null;
    try {/*w  w  w  .  j  a  v a 2  s  .co m*/
        IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer());
        indexWriter = new IndexWriter(this.getDirectory(), config);
        indexWriter.addDocument(DocumentUtil.IndexObject2Document(indexObject));
        indexWriter.commit();
    } catch (Exception e) {
        e.printStackTrace();
        try {
            indexWriter.rollback();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    } finally {
        try {
            indexWriter.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }
}

From source file:com.globalsight.ling.lucene.Index.java

License:Apache License

/**
 * Ends the batch re-creation of an index by clearing out the old
 * index files, writing the new in-memory index to disk, and
 * setting the index state to STATE_OPENED.
 *
 * @see #batchOpen()//from   w  w  w .  j  ava  2 s . c om
 */
public void batchDone() throws IOException {
    synchronized (m_state) {
        if (m_state != STATE_CREATING) {
            throw new IOException("index is not being re-created");
        }
    }
    // try to unlock this dir : for unexpected shutdown
    try {
        if (IndexWriter.isLocked(m_fsDir)) {
            IndexWriter.unlock(m_fsDir);
        }
    } catch (Exception ee) {
        // ignore
    }

    // Tho reports it can happen that the index cannot be created
    // on disk (GSDEF00012703). Trap this and release the memory
    // of the ram directory.
    IndexWriter diskwriter = null;
    try {
        // MUST optimize RAMDirectory before writing it to disk.
        // m_ramIndexWriter.optimize();

        // Write all data out to disk, optimize and clean up.

        diskwriter = getIndexWriter(true);
        diskwriter.commit();
        Directory[] ds = new Directory[] { m_ramdir };
        //Directory[] ds = new Directory[] { m_fsDir } ;
        diskwriter.addIndexes(ds);
        //diskwriter.optimize();
        //diskwriter.close();

        // clean cache if have
        LuceneCache.cleanLuceneCache(m_directory);
    } catch (IOException ex) {
        CATEGORY.error("unexpected error when persisting index " + m_directory, ex);

        throw ex;
    } catch (Throwable ex) {
        CATEGORY.error("unexpected error when persisting index " + m_directory, ex);

        throw new IOException(ex.getMessage());
    } finally {
        IOUtils.closeWhileHandlingException(diskwriter, m_ramIndexWriter, m_ramdir);

        m_ramIndexWriter = null;
        m_ramdir = null;
        m_state = STATE_OPENED;
    }
}

From source file:com.globalsight.ling.tm2.lucene.LuceneIndexWriter.java

License:Apache License

public void remove(Collection p_tuvs) throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(LuceneUtil.VERSION, m_analyzer);
    conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(m_directory, conf);

    try {//from w  w  w . j  ava 2 s. c  o  m
        for (Iterator it = p_tuvs.iterator(); it.hasNext();) {
            Object tuv = it.next();
            Long id = tuv instanceof BaseTmTuv ? ((BaseTmTuv) tuv).getId()
                    : tuv instanceof TM3Tuv ? ((TM3Tuv) tuv).getId() : null;

            Term term = new Term(TuvDocument.TUV_ID_FIELD, id.toString());
            writer.deleteDocuments(term);
        }
    } catch (Throwable e) {
        c_logger.error(e.getMessage(), e);
        //indexReader.undeleteAll();
        throw (e instanceof Exception ? (Exception) e : new Exception(e));
    } finally {
        writer.commit();
        writer.close();
    }

    // clean cache if have
    LuceneCache.cleanLuceneCache(m_indexDir);
}

From source file:com.greplin.lucene.filter.PhraseFilterBenchmark.java

License:Apache License

public static void main(String[] argv) {
    Directory directory = new RAMDirectory();
    try {/*w w  w  .  j ava2 s  .  co m*/
        IndexWriter writer = new IndexWriter(directory,
                new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32)));
        int done = 0;
        for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) {
            int remaining = NUMBER_OF_SEGMENTS - i;
            int numberOfDocs;
            if (remaining == 1) {
                numberOfDocs = TOTAL_DOCS - done;
            } else {
                numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1;
            }
            done += numberOfDocs;
            System.out.println("Segment #" + i + " has " + numberOfDocs + " docs");

            for (int d = 0; d < numberOfDocs; d++) {
                int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC
                        - WORDS_PER_DOC_DEVIATION;
                Document doc = new Document();
                doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES,
                        Field.Index.ANALYZED));
                doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no",
                        Field.Store.NO, Field.Index.ANALYZED));
                writer.addDocument(doc);
            }
            writer.commit();
        }
        writer.close();

        IndexReader reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        String[][] queries = new String[TOTAL_QUERIES][];
        Term[][] terms = new Term[TOTAL_QUERIES][];

        for (int q = 0; q < TOTAL_QUERIES; q++) {
            queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]);
            terms[q] = new Term[queries[q].length];
            for (int qw = 0; qw < queries[q].length; qw++) {
                terms[q][qw] = new Term(FIELD, queries[q][qw]);
            }
        }

        // Warm up.
        new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader);

        for (int round = 0; round < ROUNDS; round++) {
            System.out.println();
            String name1 = "filter";
            String name2 = "query";

            long ms1 = 0, ms2 = 0;
            for (int step = 0; step < 2; step++) {
                System.gc();
                System.gc();
                System.gc();

                if (step == (round & 1)) {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (String[] queryWords : queries) {
                        PhraseFilter pf = new PhraseFilter(
                                new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))),
                                FIELD, queryWords);
                        hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits;
                    }
                    ms1 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits");
                } else {
                    long millis = System.currentTimeMillis();
                    long hits = 0;
                    for (Term[] queryTerms : terms) {
                        PhraseQuery pq = new PhraseQuery();
                        for (Term term : queryTerms) {
                            pq.add(term);
                        }
                        Query query = BooleanQueryBuilder.builder()
                                .must(new TermQuery(new Term("second", "yes"))).must(pq).build();
                        hits += searcher.search(query, 1).totalHits;
                    }
                    ms2 = System.currentTimeMillis() - millis;
                    System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits");
                }
            }
            System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2);
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.impetus.kundera.index.LucandraIndexer.java

License:Apache License

@Override
public final void index(EntityMetadata metadata, Object object) {

    if (!metadata.isIndexable()) {
        return;//  w  ww  .  j a  va  2s .  co  m
    }

    log.debug("Indexing @Entity[" + metadata.getEntityClazz().getName() + "] " + object);

    String indexName = metadata.getIndexName();

    Document document = new Document();
    Field luceneField;

    // index row
    try {
        String id = PropertyAccessorHelper.getId(object, metadata);
        luceneField = new Field(ENTITY_ID_FIELD, id, // adding class
                // namespace
                Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
        document.add(luceneField);

        // index namespace for unique deletion
        luceneField = new Field(KUNDERA_ID_FIELD, getKunderaId(metadata, id), // adding
                // class
                // namespace
                Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
        document.add(luceneField);

        // index entity class
        luceneField = new Field(ENTITY_CLASS_FIELD, metadata.getEntityClazz().getCanonicalName().toLowerCase(),
                Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
        document.add(luceneField);

        // index index name
        luceneField = new Field(ENTITY_INDEXNAME_FIELD, metadata.getIndexName(), Field.Store.YES,
                Field.Index.NOT_ANALYZED_NO_NORMS);
        document.add(luceneField);

    } catch (PropertyAccessException e) {
        throw new IllegalArgumentException("Id could not be read.");
    }

    // now index all indexable properties
    for (PropertyIndex index : metadata.getIndexProperties()) {

        java.lang.reflect.Field property = index.getProperty();
        String propertyName = index.getName();

        try {
            String value = PropertyAccessorHelper.getString(object, property).toString();
            luceneField = new Field(getCannonicalPropertyName(indexName, propertyName), value, Field.Store.NO,
                    Field.Index.ANALYZED);
            document.add(luceneField);
        } catch (PropertyAccessException e) {
            // TODO: do something with the exceptions
            // e.printStackTrace();
        }
    }

    // flush the indexes
    try {
        log.debug("Flushing to Lucandra: " + document);
        if (!metadata.getDBType().equals(DBType.CASSANDRA)) {
            IndexWriter w = getDefaultIndexWriter();
            w.addDocument(document, analyzer);
            w.optimize();
            w.commit();
            w.close();

        } else {
            getIndexWriter().addDocument(document, analyzer);
        }
    } catch (CorruptIndexException e) {
        throw new IndexingException(e.getMessage());
    } catch (IOException e) {
        throw new IndexingException(e.getMessage());
    }
}

From source file:com.ivannotes.searchbee.SearchBee.java

License:Apache License

public final void doIndex(DataFetcher<T> df) throws CorruptIndexException, IOException {
    df.reset();//ww w.j av a  2s.  c  om
    IndexWriter idxWriter = getIndexWriter();
    int contiuousException = 0;
    try {
        while (df.hasMore()) {
            try {
                List<T> data = df.fetchData();
                for (T bean : data) {
                    Document doc = buildDocument(bean);
                    idxWriter.addDocument(doc);
                }

                idxWriter.commit();
                contiuousException = 0;
            } catch (Exception e) {
                contiuousException++;
                logger.error("build index error", e);
                if (contiuousException > 100) {
                    logger.error("build index exceed max continuous exception count(100), exit build.");
                    break;
                }
            }
        }
    } finally {
        if (null != idxWriter) {
            idxWriter.close();
        }
    }

}