Example usage for org.apache.lucene.index IndexWriter commit

List of usage examples for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException 

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:luceneexamples.NumericFieldDocument.java

License:Apache License

@Test
public void index() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    IndexWriter writer = new IndexWriter(directory, iwc);

    for (int i = 8; i < 12; i++) {
        Document doc = new Document();
        doc.add(new NumericField("int_field", Field.Store.YES, true).setIntValue(i));
        System.out.println(doc);//from www  .  j av a 2s .  co  m
        writer.addDocument(doc);
    }
    writer.commit();

    IndexReader reader = IndexReader.open(writer, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs td = searcher.search(new MatchAllDocsQuery(), 1000,
            new Sort(new SortField("int_field", SortField.INT)));
    assertThat(td.totalHits, is(4));
    assertThat(searcher.doc(td.scoreDocs[0].doc).get("int_field"), equalTo("8"));
    assertThat(searcher.doc(td.scoreDocs[1].doc).get("int_field"), equalTo("9"));
    assertThat(searcher.doc(td.scoreDocs[2].doc).get("int_field"), equalTo("10"));
    assertThat(searcher.doc(td.scoreDocs[3].doc).get("int_field"), equalTo("11"));

    reader.close();
    writer.close();
    searcher.close();
    directory.close();
}

From source file:luceneexamples.SortDocuments.java

License:Apache License

@Test
public void index() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    IndexWriter writer = new IndexWriter(directory, iwc);

    Document doc = new Document();
    doc.add(new Field("str_field", "abc", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);//from   w  w  w  .  jav a 2s. c  o m
    Document doc2 = new Document();
    doc2.add(new Field("str_field", "def", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc2);
    Document doc3 = new Document();
    doc3.add(new Field("str_field", "hij", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc3);
    writer.commit();

    IndexReader reader = IndexReader.open(writer, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs td = searcher.search(new MatchAllDocsQuery(), 1000,
            new Sort(new SortField("str_field", SortField.STRING)));
    assertThat(td.totalHits, is(3));
    assertThat(searcher.doc(td.scoreDocs[0].doc).get("str_field"), equalTo("abc"));
    assertThat(searcher.doc(td.scoreDocs[1].doc).get("str_field"), equalTo("def"));
    assertThat(searcher.doc(td.scoreDocs[2].doc).get("str_field"), equalTo("hij"));

    td = searcher.search(new MatchAllDocsQuery(), 1000,
            new Sort(new SortField("str_field", SortField.STRING, true)));
    assertThat(td.totalHits, is(3));
    assertThat(searcher.doc(td.scoreDocs[0].doc).get("str_field"), equalTo("hij"));
    assertThat(searcher.doc(td.scoreDocs[1].doc).get("str_field"), equalTo("def"));
    assertThat(searcher.doc(td.scoreDocs[2].doc).get("str_field"), equalTo("abc"));

    reader.close();
    writer.close();
    searcher.close();
    directory.close();
}

From source file:luceneexamples.UpdateDocument.java

License:Apache License

@Test
public void index() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    IndexWriter writer = new IndexWriter(directory, iwc);

    Document doc = new Document();
    doc.add(new Field("id", "001", Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("str_field", "quick brown fox jumped over the lazy dog.", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);//from   www. j a  v  a  2  s.  c o  m
    writer.commit();
    IndexReader reader = IndexReader.open(writer, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_31, "str_field", analyzer);
    TopDocs td = searcher.search(parser.parse("fox"), 1000);
    assertThat(td.totalHits, is(1));

    Document doc2 = new Document();
    doc.add(new Field("id", "001", Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc2.add(new Field("str_field", "quick brown fox jumped over the lazy whale.", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.updateDocument(new Term("id", "001"), doc2);
    writer.commit();

    searcher.close();
    reader = reader.reopen();
    searcher = new IndexSearcher(reader);

    td = searcher.search(parser.parse("dog"), 1000);
    assertThat(td.totalHits, is(0));
    td = searcher.search(parser.parse("whale"), 1000);
    assertThat(td.totalHits, is(1));

    writer.close();
    searcher.close();
    directory.close();
}

From source file:luceneingester.TrecIngester.java

License:Apache License

public static void main(String[] clArgs) throws Exception {
    Args args = new Args(clArgs);
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dataDir = args.getString("-dataDir");
    final int docCountLimit = args.getInt("-docCountLimit"); // -1 means all docs from the source:
    final int numThreads = args.getInt("-threadCount");
    final boolean verbose = args.getFlag("-verbose");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean doUpdate = args.getFlag("-update");
    final boolean positions = args.getFlag("-positions");

    args.check();/*from   w ww  .  j av a 2  s  .  co m*/

    final Analyzer a = new EnglishAnalyzer();
    final TrecContentSource trecSource = createTrecSource(dataDir);
    final Directory dir = FSDirectory.open(Paths.get(dirPath));

    System.out.println("Index path: " + dirPath);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("Positions: " + (positions ? "yes" : "no"));

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (doUpdate) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    System.out.println("IW config=" + iwc);

    final IndexWriter w = new IndexWriter(dir, iwc);
    IndexThreads threads = new IndexThreads(w, positions, trecSource, numThreads, docCountLimit, printDPS);
    System.out.println("\nIndexer: start");

    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done()) {
        Thread.sleep(100);
    }
    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println(
            "\nIndexer: indexing done (" + (t1 - t0) / 1000.0 + " sec); total " + w.maxDoc() + " docs");
    if (!doUpdate && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }

    final long t2;
    t2 = System.currentTimeMillis();

    final Map<String, String> commitData = new HashMap<String, String>();
    commitData.put("userData", "multi");
    w.setCommitData(commitData);
    w.commit();
    final long t3 = System.currentTimeMillis();
    System.out.println("\nIndexer: commit multi (took " + (t3 - t2) / 1000.0 + " sec)");

    System.out.println("\nIndexer: at close: " + w.segString());
    final long tCloseStart = System.currentTimeMillis();
    w.close();
    System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) / 1000.0 + " sec");
    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: finished (" + (tFinal - t0) / 1000.0 + " sec)");
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / ((tFinal - t0) / 3600000.))
                    + " GB/hour plain text");
}

From source file:model.Index.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //set the split word tech  
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    //indexwriter config info  
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    //open the index, if there is no index, build a new one
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    Directory directory = null;/*from   w  w w.j av a 2s  .  c  o m*/
    IndexWriter indexWrite = null;
    try {
        //set path of the original data
        directory = FSDirectory.open(new File(Path.IndexDir));
        //if the directory is locked , unlock it
        if (IndexWriter.isLocked(directory)) {
            IndexWriter.unlock(directory);
        }
        //new a object indexWrite  
        indexWrite = new IndexWriter(directory, indexWriterConfig);
    } catch (Exception e) {
        e.printStackTrace();
    }

    PreProcessDoc getDoc = new PreProcessDoc();
    WebDocument tempDoc = null;
    while ((tempDoc = getDoc.nextDocument()) != null) {
        Document doc = new Document();
        doc.add(new TextField("link", tempDoc.getDocLink(), Store.YES));
        doc.add(new TextField("content", tempDoc.getDocContent(), Store.YES));
        try {
            //write doc into index  
            indexWrite.addDocument(doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //commit the data, if not , it would not be saved
    try {
        indexWrite.commit();
        //close the resource
        indexWrite.close();
        directory.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:mw.wikidump.MakeLuceneIndex.java

License:Open Source License

/**
 * @param args//from w  ww. ja va  2  s.c o m
 * @throws IOException
 * @throws ParseException
 */
public static void main(String[] args) throws IOException, ParseException {
    String baseDir = "";
    String wikiDumpFile = "enwiki-20110405-pages-articles.xml";
    String luceneIndexName = "enwiki-20110405-lucene";
    String logFile = luceneIndexName + ".log";
    boolean bIgnoreStubs = false;
    String writeToTextFilesDir = "";

    for (int i = 0; i < args.length; ++i) {
        if (args[i].equals("-luceneindex"))
            luceneIndexName = args[++i];

        if (args[i].equals("-basedir"))
            baseDir = args[++i];

        if (args[i].equals("-logfile"))
            logFile = args[++i];

        if (args[i].equals("-dumpfile"))
            wikiDumpFile = args[++i];

        if (args[i].equals("-ignorestubs"))
            bIgnoreStubs = true;

        if (args[i].equals("-writetotextfilesdir")) {
            writeToTextFilesDir = args[++i];
        }
    }

    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("tokenized_title", new StandardAnalyzer());
    analyzerPerField.put("contents", new StandardAnalyzer());

    PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), analyzerPerField);

    File basePath = new File(baseDir);
    File luceneIndex = new File(basePath.getCanonicalPath() + File.separator + luceneIndexName);

    logFile = basePath.getCanonicalPath() + File.separator + logFile;

    // log to file and console:
    // PlainLogger logger = new PlainLogger( logFile );
    // log only to console:
    PlainLogger logger = new PlainLogger();

    logger.log("Work directory:     " + basePath.getCanonicalPath());
    logger.log("Lucene index:       " + luceneIndexName);
    logger.log("Wikipedia dumpfile: " + wikiDumpFile);
    logger.log("");
    if (bIgnoreStubs)
        logger.log("Ignoring stubs");
    else
        logger.log("Including stubs");
    logger.log("");

    // create the index
    Directory indexDirectory = FSDirectory.open(FileSystems.getDefault().getPath(baseDir));
    IndexWriter indexWriter = new IndexWriter(indexDirectory, new IndexWriterConfig(analyzer));

    Extractor wikidumpExtractor = new Extractor(basePath.getCanonicalPath() + File.separator + wikiDumpFile);
    wikidumpExtractor.setLinkSeparator("_");
    wikidumpExtractor.setCategorySeparator("_");

    int iStubs = 0;
    int iArticleCount = 0;
    int iSkippedPageCount = 0;
    long iStartTime = java.lang.System.nanoTime();
    long iTime = iStartTime;

    FieldType fieldType = new FieldType();
    fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    fieldType.setTokenized(true);
    fieldType.setStoreTermVectors(true);
    fieldType.setStoreTermVectorPositions(true);

    while (wikidumpExtractor.nextPage()) {
        if (wikidumpExtractor.getPageType() != Extractor.PageType.ARTICLE) {
            ++iSkippedPageCount;
            continue;
        }

        if (bIgnoreStubs && wikidumpExtractor.getStub()) {
            ++iStubs;
            continue;
        }

        Document doc = new Document();
        ++iArticleCount;

        doc.add(new StoredField("path", String.format("%d", iArticleCount)));

        wikidumpExtractor.setTitleSeparator("_");
        String title = wikidumpExtractor.getPageTitle(false).toLowerCase();
        doc.add(new Field("title", title, fieldType));

        wikidumpExtractor.setTitleSeparator(" ");
        doc.add(new Field("tokenized_title", wikidumpExtractor.getPageTitle(false).toLowerCase(), fieldType));

        doc.add(new Field("categories", wikidumpExtractor.getPageCategories().toLowerCase(), fieldType));
        doc.add(new Field("links", wikidumpExtractor.getPageLinks().toLowerCase(), fieldType));
        doc.add(new Field("contents", wikidumpExtractor.getPageAbstract().toLowerCase(), fieldType));

        indexWriter.addDocument(doc);

        if (!writeToTextFilesDir.isEmpty()) {
            String fileName = doc.get("title");
            fileName = fileName.replace('/', '_');
            writeToTextFile(writeToTextFilesDir, fileName, doc.get("contents"));
        }

        if (iArticleCount % 50000 == 0) {
            logger.add(iArticleCount + " (" + NanoTimeFormatter.getS(System.nanoTime() - iTime) + "s) ");
            iTime = System.nanoTime();

            if (iArticleCount % 250000 == 0) {
                try {
                    indexWriter.commit();
                    logger.add(
                            "-- commit. Skipped page count " + iSkippedPageCount + " (+ " + iStubs + " stubs)");
                    logger.log(String.format(", time %sm",
                            NanoTimeFormatter.getM(System.nanoTime() - iStartTime)));
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }

    logger.log("");
    logger.log(
            String.format("Overall time %s minutes, ", NanoTimeFormatter.getM(System.nanoTime() - iStartTime)));
    logger.add("collected " + iArticleCount + " articles, ");
    logger.add("skipped " + iSkippedPageCount + " nonarticle pages,");
    logger.log("skipped " + iStubs + " stubs.");
    logger.log("");

    iTime = System.nanoTime();
    logger.add(" closing...");
    indexWriter.close();
    logger.log(" done in " + NanoTimeFormatter.getS(System.nanoTime() - iTime) + "s.");

    logger.close();
    System.exit(0);
}

From source file:net.bobah.mail.Indexer.java

License:Apache License

private void runEx() throws Exception {
    final File dir = new File(config.getProperty("net.bobah.mail.local.folder"));
    if (!dir.exists() || !dir.isDirectory()) {
        throw new IllegalArgumentException(String.format("\"%s\" does not exist or is not a directory", dir));
    }//from  ww w .  j  a v  a2s.  c  o m

    Collection<File> files = findFiles(dir, new FileFilter() {
        @Override
        public boolean accept(File file) {
            return file.getName().endsWith(".eml");
        }
    }, new Comparator<File>() {
        @Override
        public int compare(File l, File r) {
            return Long.compare(l.lastModified(), r.lastModified());
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    final File indexDir = new File(dir, "index");

    final boolean indexExisted = indexDir.exists();
    if (!indexExisted)
        indexDir.mkdirs();

    final Directory idx = FSDirectory.open(indexDir);
    final IndexWriter writer = new IndexWriter(idx, iwc);

    final IndexReader reader = indexExisted ? DirectoryReader.open(idx) : null;
    final IndexSearcher searcher = indexExisted ? new IndexSearcher(reader) : null;

    //final AtomicLong counter = new AtomicLong(0l);
    try {
        for (final File file : files) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        index(file, writer, searcher);
                        //if (counter.incrementAndGet() % 100 == 0) writer.commit(); // TODO: VL: make batch size configurable
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }

        shutdownExecutor(executor, log);

        // TODO: VL: delete stale documents from the index

        writer.commit();
        log.info("committed index updates");

        searcher.search(new MatchAllDocsQuery(), new Collector() {
            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void setNextReader(AtomicReaderContext unused) throws IOException {
            }

            @Override
            public void collect(int docID) throws IOException {
                Document doc = reader.document(docID);
                final String path = doc.get("path");
                if (path != null) {
                    try {
                        final File file = new File(path);
                        if (!file.exists()) {
                            log.info("deleting index for {}", doc.get("id"));
                            writer.deleteDocuments(new Term("id", doc.get("id")));
                        }
                    } catch (SecurityException e) {
                        log.error("exception", e);
                    }
                }
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });

        writer.commit();
        log.info("committed index deletions");

    } finally {
        try {
            // close writer without commit (see explicit commits above)
            writer.rollback();
        } catch (IOException e) {
            log.error("exception while closing writer", e);
        }
    }
}

From source file:net.ion.craken.node.problem.distribute.DemoActions.java

License:Open Source License

/**
 * Creates a new document having just one field containing a string
 * /*from   w  ww  . j a v a  2s  .c o  m*/
 * @param line The text snippet to add
 * @throws IOException
 */
public void addNewDocument(String line) throws IOException {
    IndexWriterConfig iwconfig = new IndexWriterConfig(SearchConstant.LuceneVersion, analyzer);
    IndexWriter iw = new IndexWriter(idir, iwconfig);
    try {
        Document doc = new Document();
        Field field = new Field(MAIN_FIELD, line, Store.YES, Index.ANALYZED);
        doc.add(field);
        iw.addDocument(doc);
        iw.commit();
    } finally {
        iw.close();
    }
}

From source file:net.mad.ads.services.geo.lucene.GeoIpIndex.java

License:Open Source License

public void importIPs(String path) {

    try {/*from   w ww  .  java  2s . c o m*/

        if (!path.endsWith("/")) {
            path += "/";
        }

        Directory directory = FSDirectory.open(new File(db, "geo"));
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31,
                new StandardAnalyzer(Version.LUCENE_31));
        IndexWriter writer = new IndexWriter(directory, config);

        List<String> cnames = new ArrayList<String>();

        BufferedReader br = new BufferedReader(new FileReader(path + "GeoLiteCity-Blocks.csv"));
        CSVReader reader = new CSVReader(br, ',', '\"', 2);

        //         Scanner scanner = new Scanner(new FileReader(filename));
        //         boolean firstLine = true;
        int count = 0;
        String[] values;
        Map<String, Map<String, String>> locations = getLocations(path);
        while ((values = reader.readNext()) != null) {
            String ipfrom = values[0];
            String ipto = values[1];
            String locid = values[2];

            Map<String, String> location = locations.get(locid);

            Document doc = new Document();
            doc.add(new Field("city", location.get("city"), Store.YES, Index.ANALYZED));
            doc.add(new Field("postalcode", location.get("postalcode"), Store.YES, Index.ANALYZED));
            doc.add(new Field("country", location.get("country"), Store.YES, Index.ANALYZED));
            doc.add(new Field("region", location.get("region"), Store.YES, Index.ANALYZED));
            doc.add(new Field("latitude", location.get("latitude"), Store.YES, Index.ANALYZED));
            doc.add(new Field("longitude", location.get("longitude"), Store.YES, Index.ANALYZED));

            NumericField ipfromField = new NumericField("ipfrom", 8, Store.YES, true);
            ipfromField.setLongValue(Long.parseLong(ipfrom.trim()));
            doc.add(ipfromField);
            NumericField iptoField = new NumericField("ipto", 8, Store.YES, true);
            iptoField.setLongValue(Long.parseLong(ipto.trim()));
            doc.add(iptoField);
            //            doc.add(new NumericField("ipto", ipto, Store.YES, Index.ANALYZED));
            writer.addDocument(doc);

            count++;

            if (count % 100 == 0) {
                writer.commit();
            }
        }

        System.out.println(count + " Eintrge importiert");

        writer.optimize();
        writer.close();

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:net.semanticmetadata.lire.benchmarking.CombinationTest.java

License:Open Source License

public void testIndexing() throws IOException {
    ChainedDocumentBuilder cb = new ChainedDocumentBuilder();
    cb.addBuilder(new SurfDocumentBuilder());
    cb.addBuilder(DocumentBuilderFactory.getColorLayoutBuilder());

    System.out.println("-< Getting files to index >--------------");
    ArrayList<String> images = FileUtils.getAllImages(new File(dataPath), true);
    System.out.println("-< Indexing " + images.size() + " files >--------------");

    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath, true);
    int count = 0;
    long time = System.currentTimeMillis();
    for (String identifier : images) {
        Document doc = cb.createDocument(new FileInputStream(identifier), identifier);
        iw.addDocument(doc);//from  ww w.  j  a va  2 s  .  com
        count++;
        if (count % 100 == 0)
            System.out.println(count + " files indexed.");
        //            if (count == 200) break;
    }
    long timeTaken = (System.currentTimeMillis() - time);
    float sec = ((float) timeTaken) / 1000f;

    System.out.println(sec + " seconds taken, " + (timeTaken / count) + " ms per image.");
    iw.commit();
    iw.close();

    System.out.println("-< Local features are getting clustered >--------------");

    BOVWBuilder sh = new BOVWBuilder(IndexReader.open(FSDirectory.open(new File(indexPath))), new SurfFeature(),
            200, 8000);
    sh.index();

    System.out.println("-< Indexing finished >--------------");

}