Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.lucene.index.test.IKAnalyzerdemoMutilField.java

License:Apache License

/**
 * /*w  w  w.j a va2 s .  c o  m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text1 = "oracle?";
    String text2 = "?";
    String text3 = "?";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig

        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc1 = new Document();
        doc1.add(new StringField("ID", "10000", Field.Store.YES));
        doc1.add(new TextField(fieldName, text1, Field.Store.YES));
        iwriter.addDocument(doc1);

        Document doc2 = new Document();
        doc2.add(new StringField("ID", "10000", Field.Store.YES));
        doc2.add(new TextField(fieldName, text2, Field.Store.YES));
        iwriter.addDocument(doc2);

        Document doc3 = new Document();
        doc3.add(new StringField("ID", "10000", Field.Store.YES));
        doc3.add(new TextField(fieldName, text3, Field.Store.YES));
        iwriter.addDocument(doc3);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;/*from  w w w.j  ava 2  s .c  o m*/
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();

    long Cbegintime = System.nanoTime();// 

    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();
        long Cendtime = System.nanoTime();// 
        Date end = new Date();

        System.out.println("");

        // ?

        BigDecimal diff = BigDecimal.valueOf(Cendtime - Cbegintime, 10);// 

        double time = diff.setScale(4, BigDecimal.ROUND_HALF_UP).doubleValue();

        System.out.println(end.getTime() - start.getTime() + "millsecond");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.main.Indexer.java

public void indexing() throws TikaException, SAXException {
    //Input folder
    String docsPath = "C:\\Users\\piyush\\Documents\\NetBeansProjects\\luceneFinal\\indexing\\doc";
    //Output folder
    String indexPath = "C:\\Users\\piyush\\Documents\\NetBeansProjects\\luceneFinal\\indexing\\index";
    //Input Path Variable
    final Path docDir = Paths.get(docsPath);
    try {/*  ww w.j a  v  a 2 s . c  o  m*/
        //org.apache.lucene.store.Directory instance
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        //analyzer with the default stop words
        Analyzer analyzer = new StandardAnalyzer();
        //IndexWriter Configuration
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        //IndexWriter writes new index files to the directory
        IndexWriter writer = new IndexWriter(dir, iwc);
        //Its recursive method to iterate all files and directories
        indexDocs(writer, docDir);
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

}

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//*from w  ww.  j a v  a2 s . co m*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.mycompany.mavenproject1.Main.java

public static void main(String[] args) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    //        Directory index = new RAMDirectory();
    Directory index = new SimpleFSDirectory(Paths.get(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data"));
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    //config.setOpenMode(OpenMode.CREATE);
    IndexWriter w = new IndexWriter(index, config);
    try (ItemProvider provider = new ItemProvider(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) {

        while (provider.hasNext()) {
            Item item = provider.next();
            addItem(w, item);/*from w w w . j  ava  2 s.c  o  m*/

        }
    } catch (XMLStreamException | IOException ex) {
        ex.getMessage();
    }
    //        w.commit();
    w.close();

    //        String queryStr = "id:1* NOT id:19*";
    String a = "id:1* NOT id:19*";
    String b = "name:Dekielek AND description:(ty AND obiektywu)";
    String c = "category:Dek*";
    String ds = "id:1232~2";
    String e = "price:[0.0 TO 100.0]";

    Query q = new QueryParser("name", analyzer).parse(ds);

    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, hitsPerPage);
    ScoreDoc[] hits = docs.scoreDocs;

    System.out.println("Found " + hits.length + " hits.");
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        System.out
                .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description"));
    }
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//www  .java  2  s . c o  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        //         MorphemeAnalyzer ma = new MorphemeAnalyzer();

        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.nuvolect.deepdive.lucene.Index.java

public static JSONObject index(final String volumeId, final String searchPath, final boolean forceIndex) {

    if (m_interrupt[0]) {

        LogUtil.log(LogUtil.LogType.INDEX, "Index canceled post interrupt");

        m_interrupt[0] = false;/*from   w  w  w  . j a v  a2s .  c  o m*/
        return responseInterruptIndexing();
    }

    OmniFile cacheDir = IndexUtil.getCacheDir(volumeId, searchPath);
    boolean cacheDirCreated = false;
    try {
        cacheDirCreated = OmniUtil.forceMkdir(cacheDir);
    } catch (IOException e) {
        return responseFolderCreateError(searchPath);
    }

    final String luceneDirPath = cacheDir.getAbsolutePath();

    boolean cacheDirExists = !cacheDirCreated;
    boolean indexingOngoing = m_indexThread != null && m_indexThread.isAlive();
    boolean indexingRequired = !cacheDirExists || forceIndex;

    synchronized (m_lock) {

        if (indexingOngoing) {

            if (m_fileTreeActive)
                m_index_state = INDEX_STATE.filetree;
            else
                m_index_state = INDEX_STATE.indexing;
        } else {
            if (indexingRequired)
                m_index_state = INDEX_STATE.indexing;
            else
                m_index_state = INDEX_STATE.complete;
        }
    }

    if (indexingRequired || indexingOngoing) {

        if (indexingOngoing) {

            // Nothing to do, let the background process run. Monitor m_indexedDocs for progress.
        } else {

            synchronized (m_lock) {
                m_index_state = INDEX_STATE.filetree;
                m_totalDocs[0] = 0;
                m_indexedDocs[0] = 0;
                m_error[0] = "";
            }
            m_threadGroup = new ThreadGroup(INDEX_THREAD_GROUP);
            m_indexThread = new Thread(m_threadGroup, new Runnable() {
                @Override
                public void run() {

                    //                        Analyzer analyzer = new org.apache.lucene.analysis.core.WhitespaceAnalyzer();
                    //                        Analyzer analyzer = new org.apache.lucene.analysis.core.KeywordAnalyzer();
                    //                        Analyzer analyzer = new org.apache.lucene.analysis.standard.StandardAnalyzer();
                    Analyzer analyzer = new org.apache.lucene.analysis.core.SimpleAnalyzer();
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    IndexWriter iwriter = null;

                    try {
                        Directory m_directory = FSDirectory.open(Paths.get(luceneDirPath));
                        iwriter = new IndexWriter(m_directory, config);
                        iwriter.deleteAll();
                        iwriter.commit();
                    } catch (IOException e) {
                        LogUtil.logException(LogUtil.LogType.INDEX, e);
                        m_error[0] = "IndexWriter constructor exception";
                    }

                    synchronized (m_lock) {
                        m_fileTreeActive = true;
                        m_index_state = INDEX_STATE.filetree;
                    }
                    Collection<OmniFile> files = IndexUtil.getFilePaths(volumeId, searchPath);

                    synchronized (m_lock) {
                        m_index_state = INDEX_STATE.indexing;
                        m_fileTreeActive = false;
                        m_totalDocs[0] = files.size();
                        m_indexedDocs[0] = 0;
                    }

                    try {

                        for (OmniFile file : files) {

                            if (m_interrupt[0]) {
                                LogUtil.log(LogUtil.LogType.INDEX, "Iterator loop canceled");
                                break;
                            }

                            String path = file.getPath();

                            //                                LogUtil.log(LogUtil.LogType.INDEX, "indexing: " + path);// this is a bit excessive
                            iwriter.addDocument(makeDoc(volumeId, path));
                            synchronized (m_lock) {
                                ++m_indexedDocs[0];
                            }
                        }

                        iwriter.commit();
                        iwriter.close();
                        synchronized (m_lock) {
                            m_index_state = m_interrupt[0] ? INDEX_STATE.interrupted : INDEX_STATE.complete;
                            m_totalDocs[0] = m_indexedDocs[0];
                        }

                    } catch (Exception e) {
                        LogUtil.logException(LogUtil.LogType.INDEX, e);
                        m_error[0] = "IndexWriter addDocument exception";
                    }
                }
            }, INDEX_THREAD, STACK_SIZE);

            m_indexThread.setPriority(Thread.MAX_PRIORITY);
            m_indexThread.start();
        }
    } else {

        // Indexing is complete
        // Get number of documents indexed
        try {
            Directory directory = FSDirectory.open(Paths.get(luceneDirPath));
            DirectoryReader ireader = DirectoryReader.open(directory);
            synchronized (m_lock) {
                m_indexedDocs[0] = ireader.numDocs();
                m_totalDocs[0] = m_indexedDocs[0];
                m_index_state = INDEX_STATE.complete;
            }
            ireader.close();
            directory.close();
        } catch (IOException e) {
            LogUtil.logException(LogUtil.LogType.INDEX, e);
        }
    }

    JSONObject result = new JSONObject();
    try {
        synchronized (m_lock) {
            result.put("index_state", m_index_state.toString());
            result.put("error", m_error[0]);
            result.put("indexed_docs", m_indexedDocs[0]);
            result.put("total_docs", m_totalDocs[0]);
            //                result.put("full_path", cacheDir.getAbsolutePath());
            result.put("search_path", searchPath);
        }
    } catch (JSONException e) {
        e.printStackTrace();
    }

    return result;
}

From source file:com.o19s.es.explore.ExplorerQueryTests.java

License:Apache License

@Before
public void setupIndex() throws Exception {
    dir = new RAMDirectory();

    try (IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) {
        for (int i = 0; i < docs.length; i++) {
            Document doc = new Document();
            doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE));
            doc.add(newTextField("text", docs[i], Field.Store.YES));
            indexWriter.addDocument(doc);
        }/*w  w  w. j  av a  2s .  com*/
    }

    reader = DirectoryReader.open(dir);
    searcher = new IndexSearcher(reader);
}

From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java

License:Apache License

@Override
public IndexWriter openIndexWriter(Directory directory) throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    return new IndexWriter(directory, iwc);
}

From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java

License:Apache License

@Override
public IndexWriter createIndexWriter(Directory directory) throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    return new IndexWriter(directory, iwc);
}