Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer) 

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:cs412.project.search.IndexFiles.java

License:Apache License

public IndexFiles(String docsPath, String indexPath) {

    boolean create = true;

    if (docsPath == null) {
        System.exit(1);// w  w w . j a v a  2  s .  co  m
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:cs412.project.search.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    // String docsPath = "H:\\data set 4";
    //CHANGE BELOW TO YOUR PATH
    String docsPath = "Split Files/";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];//from   w  w  w  .j a v a 2s.  c  om
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:cs571.proj1.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w w w  . j a va2  s.  co  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        } else if ("-tfidf".equals(args[i])) {
            tfidf = true;
        } else if ("-bm25".equals(args[i])) {
            bm25 = true;
        }

    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (tfidf)
            iwc.setSimilarity(new TFIDF());
        if (bm25)
            iwc.setSimilarity(new BM25());
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        System.out.println("Total # of Docs Indexed: " + numOfDocuments);

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:csdn.lucene.first.version.Indexer.java

License:Apache License

public Indexer(String indexDir) throws IOException {
    // open file in indexDir
    Path pathA = Paths.get(indexDir);
    Directory dir = FSDirectory.open(pathA);
    IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer(true));
    writer = new IndexWriter(dir, config);
}

From source file:dbn.db.FullTextTrigger.java

/**
 * Get the Lucene index access/*from   ww  w  .  ja  v a  2  s.  c  o m*/
 *
 * @param   conn                SQL connection
 * @throws  SQLException        Unable to access the Lucene index
 */
private static void getIndexAccess(Connection conn) throws SQLException {
    if (!isActive) {
        throw new SQLException("NRS is no longer active");
    }
    boolean obtainedUpdateLock = false;
    if (!indexLock.writeLock().hasLock()) {
        indexLock.updateLock().lock();
        obtainedUpdateLock = true;
    }
    try {
        if (indexPath == null || indexWriter == null) {
            indexLock.writeLock().lock();
            try {
                if (indexPath == null) {
                    getIndexPath(conn);
                }
                if (directory == null) {
                    directory = FSDirectory.open(indexPath);
                }
                if (indexWriter == null) {
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
                    indexWriter = new IndexWriter(directory, config);
                    Document document = new Document();
                    document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES));
                    indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document);
                    indexWriter.commit();
                    indexReader = DirectoryReader.open(directory);
                    indexSearcher = new IndexSearcher(indexReader);
                }
            } finally {
                indexLock.writeLock().unlock();
            }
        }
    } catch (IOException | SQLException exc) {
        Logger.logErrorMessage("Unable to access the Lucene index", exc);
        throw new SQLException("Unable to access the Lucene index", exc);
    } finally {
        if (obtainedUpdateLock) {
            indexLock.updateLock().unlock();
        }
    }
}

From source file:de.anycook.db.lucene.FulltextIndex.java

License:Open Source License

private IndexWriterConfig createIndexWriterConfig() {
    return new IndexWriterConfig(analyzer);
}

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//from w ww . j a  v  a 2 s.  c  om

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//from  w  w w .  j  a va 2s  .co m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:de.dfki.km.leech.lucene.ToLuceneContentHandler.java

License:Open Source License

synchronized protected IndexWriter getCurrentWriter()
        throws CorruptIndexException, LockObtainFailedException, IOException {

    if (getSplitAndMergeIndex() <= 0)
        return m_initialLuceneWriter;

    if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex())
        return m_luceneWriter;

    Directory directory = m_initialLuceneWriter.getDirectory();

    Path fOurTmpDir = null;/*  w ww . j a  va  2 s .  co m*/
    if (directory instanceof FSDirectory) {
        if (m_luceneWriter != m_initialLuceneWriter)
            m_llIndexWriter2Close.add(m_luceneWriter);

        String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString();
        // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\')
        // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1);
        strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1);
        fOurTmpDir = Paths.get(strTmpPath);
    } else {
        // wir brauchen was temporres
        File parentDir = new File(System.getProperty("java.io.tmpdir"));
        fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/"
                + UUID.randomUUID().toString().replaceAll("\\W", "_"));
    }

    Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Current index exceeds "
            + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir);

    @SuppressWarnings("deprecation")
    IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer());
    config.setOpenMode(OpenMode.CREATE);

    m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config);
    m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString());

    return m_luceneWriter;
}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Creates all writer, reader, and searcher objects if necessary
 * /*w ww .j av a 2  s .  c  om*/
 * @throws CorruptIndexException
 * @throws LockObtainFailedException
 * @throws IOException
 */
public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException {
    if (m_indexWriter == null) {
        IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        m_indexWriter = new IndexWriter(new SimpleFSDirectory(Paths.get(m_strHistoryPath)), config);
    }

    if (m_indexReader == null)
        m_indexReader = DirectoryReader.open(m_indexWriter, true, true);

    if (m_indexSearcher == null)
        m_indexSearcher = new IndexSearcher(m_indexReader);
}