Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:crawler.WebCrawler.java

License:Open Source License

/**
 * Constructor.//from w  w w . j a v a2  s. c o m
 */
public WebCrawler() {
    threadNumber = configurator.propertyInteger("threadNumber");
    //        System.out.println("threadNumber " + threadNumber);

    verbose = configurator.propertyBoolean("verbose");
    //        System.out.println("verbose " + verbose);

    logFilePath = configurator.property("logFilePath");
    redirect = (logFilePath.trim().isEmpty() ? false : true);
    //        System.out.println("logFilePath " + logFilePath);

    storagePath = configurator.property("storagePath");
    //        System.out.println("storagePath " + storagePath);

    indexPath = configurator.property("indexPath");
    //        System.out.println("indexPath " + indexPath);

    maximumFileNumber = configurator.propertyInteger("maximumFileNumber");
    //        System.out.println("maximumFileNumber " + maximumFileNumber);

    timeout = configurator.propertyInteger("timeout");
    //        System.out.println("timeout " + timeout);

    depth = configurator.propertyInteger("depth");
    //        System.out.println("depth " + depth);

    followImgLinks = configurator.propertyBoolean("followImgLinks");
    //        System.out.println("followImgLinks " + followImgLinks);

    sitemapAssisted = configurator.propertyBoolean("sitemapAssisted");
    //        System.out.println("sitemapAssisted " + sitemapAssisted);

    agent = configurator.property("agent");
    //        System.out.println("agent " + agent);

    executor = Executors.newFixedThreadPool(this.threadNumber);
    futures = new LinkedList<Future<?>>();
    visited = new ConcurrentHashMap<String, URL>();

    try {
        Directory directory = new NIOFSDirectory(new File(this.indexPath));
        Version lv = Version.LUCENE_41;
        Analyzer a = new EnglishAnalyzer(lv);
        IndexWriterConfig iwc = new IndexWriterConfig(lv, a);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setWriteLockTimeout(20000);
        luceneIndexWriter = new IndexWriter(directory, iwc);
    } catch (IOException ioe) {
        ioe.printStackTrace();
    }
}

From source file:cs412.project.search.IndexFiles.java

License:Apache License

public IndexFiles(String docsPath, String indexPath) {

    boolean create = true;

    if (docsPath == null) {
        System.exit(1);//from  www.jav  a2 s.c om
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:cs412.project.search.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    // String docsPath = "H:\\data set 4";
    //CHANGE BELOW TO YOUR PATH
    String docsPath = "Split Files/";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/*from   w  ww.j a v a  2 s .c o  m*/
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:cs571.proj1.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;/*from  w w w.  j  a v  a  2 s  . c o  m*/
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        } else if ("-tfidf".equals(args[i])) {
            tfidf = true;
        } else if ("-bm25".equals(args[i])) {
            bm25 = true;
        }

    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (tfidf)
            iwc.setSimilarity(new TFIDF());
        if (bm25)
            iwc.setSimilarity(new BM25());
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        System.out.println("Total # of Docs Indexed: " + numOfDocuments);

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:dbn.db.FullTextTrigger.java

/**
 * Get the Lucene index access//from   w w w . j  ava2 s.  co m
 *
 * @param   conn                SQL connection
 * @throws  SQLException        Unable to access the Lucene index
 */
private static void getIndexAccess(Connection conn) throws SQLException {
    if (!isActive) {
        throw new SQLException("NRS is no longer active");
    }
    boolean obtainedUpdateLock = false;
    if (!indexLock.writeLock().hasLock()) {
        indexLock.updateLock().lock();
        obtainedUpdateLock = true;
    }
    try {
        if (indexPath == null || indexWriter == null) {
            indexLock.writeLock().lock();
            try {
                if (indexPath == null) {
                    getIndexPath(conn);
                }
                if (directory == null) {
                    directory = FSDirectory.open(indexPath);
                }
                if (indexWriter == null) {
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
                    indexWriter = new IndexWriter(directory, config);
                    Document document = new Document();
                    document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES));
                    indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document);
                    indexWriter.commit();
                    indexReader = DirectoryReader.open(directory);
                    indexSearcher = new IndexSearcher(indexReader);
                }
            } finally {
                indexLock.writeLock().unlock();
            }
        }
    } catch (IOException | SQLException exc) {
        Logger.logErrorMessage("Unable to access the Lucene index", exc);
        throw new SQLException("Unable to access the Lucene index", exc);
    } finally {
        if (obtainedUpdateLock) {
            indexLock.updateLock().unlock();
        }
    }
}

From source file:de.blizzy.documentr.search.AllDocIdsCollectorTest.java

License:Open Source License

@Before
public void setUp() throws IOException {
    directory = new RAMDirectory();

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, writerConfig);
    writer.addDocument(createDocument());
    writer.addDocument(createDocument());
    writer.addDocument(createDocument());
    writer.commit();/*from w ww . j av a  2 s.c  o m*/
    writer.close(true);

    reader = DirectoryReader.open(directory);
}

From source file:de.blizzy.documentr.search.GetSearchHitTaskTest.java

License:Open Source License

@Before
public void setUp() throws IOException {
    directory = new RAMDirectory();

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, writerConfig);
    writer.addDocument(createDocument("project", "branch", "home", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
            new String[] { "tag1", "tag2" }, //$NON-NLS-1$ //$NON-NLS-2$
            "title", "some text")); //$NON-NLS-1$ //$NON-NLS-2$
    writer.commit();//from www  . java2  s . c  o  m
    writer.close(true);

    reader = DirectoryReader.open(directory);

    Query query = new TermQuery(new Term("text", "some")); //$NON-NLS-1$ //$NON-NLS-2$
    task = new GetSearchHitTask(query, reader, 0, analyzer);
}

From source file:de.blizzy.documentr.search.InaccessibleDocIdsCollectorTest.java

License:Open Source License

@Before
public void setUp() throws IOException {
    directory = new RAMDirectory();

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, writerConfig);
    writer.addDocument(createDocument("project", "branch1", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
    writer.addDocument(createDocument("project", "branch2", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
    writer.commit();/*from   w w w .  j  a v  a2 s  . co  m*/
    writer.close(true);

    reader = DirectoryReader.open(directory);

    collector = new InaccessibleDocIdsCollector(Permission.VIEW, authentication, permissionEvaluator);
}

From source file:de.blizzy.documentr.search.PageIndex.java

License:Open Source License

@PostConstruct
public void init() throws IOException {
    File indexDir = new File(settings.getDocumentrDataDir(), "index"); //$NON-NLS-1$
    File pageIndexDir = new File(indexDir, "page"); //$NON-NLS-1$
    FileUtils.forceMkdir(pageIndexDir);//w w  w  .  j av a  2s.  co m

    directory = FSDirectory.open(pageIndexDir);

    Analyzer defaultAnalyzer = new EnglishAnalyzer(Version.LUCENE_40);
    Map<String, Analyzer> fieldAnalyzers = Maps.newHashMap();
    fieldAnalyzers.put(ALL_TEXT_SUGGESTIONS, new StandardAnalyzer(Version.LUCENE_40));
    analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    writer = new IndexWriter(directory, config);
    writer.commit();

    readerManager = new ReaderManager(directory);
    searcherManager = new SearcherManager(directory, null);

    log.info("checking if index is empty"); //$NON-NLS-1$
    if (getNumDocuments() == 0) {
        reindexEverything();
    }
}

From source file:de.blizzy.documentr.search.PagePermissionFilterTest.java

License:Open Source License

@Before
public void setUp() throws IOException {
    directory = new RAMDirectory();

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, writerConfig);
    writer.addDocument(createDocument("project", "branch1", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
    writer.addDocument(createDocument("project", "branch2", "home")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
    writer.commit();//  ww  w  . j  av  a 2s. co m
    writer.close(true);

    reader = DirectoryReader.open(directory);

    BitSet docs = new BitSet();
    docs.set(1);
    Bits docIds = new DocIdBitSet(docs);
    filter = new PagePermissionFilter(docIds);
}