Example usage for org.apache.lucene.index IndexWriter IndexWriter

List of usage examples for org.apache.lucene.index IndexWriter IndexWriter

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter IndexWriter.

Prototype

public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException 

Source Link

Document

Constructs a new IndexWriter per the settings given in conf.

Usage

From source file:com.mathworks.xzheng.tools.SpatialLuceneExample.java

License:Apache License

SpatialLuceneExample() throws IOException {
    directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));

    writer = new IndexWriter(directory, config);
}

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//* w w  w  . j a va  2s  .c o  m*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.meltmedia.cadmium.search.SearchContentPreprocessor.java

License:Apache License

@Override
public synchronized void processFromDirectory(String metaDir) throws Exception {
    SearchHolder newStagedSearcher = new SearchHolder();
    indexDir = new File(metaDir, "lucene-index");
    dataDir = new File(metaDir).getParentFile();
    newStagedSearcher.directory = new NIOFSDirectory(indexDir);
    IndexWriter iwriter = null;/*from  w w w  . ja v  a  2s  . co  m*/
    try {
        iwriter = new IndexWriter(newStagedSearcher.directory,
                new IndexWriterConfig(Version.LUCENE_43, analyzer).setRAMBufferSizeMB(5));
        iwriter.deleteAll();
        writeIndex(iwriter, dataDir);
    } finally {
        IOUtils.closeQuietly(iwriter);
        iwriter = null;
    }
    newStagedSearcher.indexReader = DirectoryReader.open(newStagedSearcher.directory);
    SearchHolder oldStage = stagedSearch;
    stagedSearch = newStagedSearcher;
    if (oldStage != null) {
        oldStage.close();
    }
    log.info("About to call processSearchPreprocessors()");
    processSearchPreprocessors(newStagedSearcher.indexReader, analyzer, "content");
}

From source file:com.meltwater.elasticsearch.index.RamDirectoryPercolatorIndex.java

License:Apache License

public Directory indexDocuments(List<ParsedDocument> parsedDocuments) {
    try {/*w ww  .j ava 2 s .  c om*/
        Directory directory = new RAMDirectory();
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_4,
                mapperService.analysisService().defaultIndexAnalyzer());
        IndexWriter iwriter = new IndexWriter(directory, conf);
        for (ParsedDocument document : parsedDocuments) {
            for (ParseContext.Document doc : document.docs()) {
                iwriter.addDocument(doc, document.analyzer());
            }
        }
        iwriter.close();
        return directory;
    } catch (IOException e) {
        throw new ElasticsearchException("Failed to write documents to RAMDirectory", e);
    }
}

From source file:com.miliworks.virgo.test.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 * //from w  ww . j  av  a  2 s  .  c om
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.mycompany.mavenproject1.Main.java

public static void main(String[] args) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    //        Directory index = new RAMDirectory();
    Directory index = new SimpleFSDirectory(Paths.get(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data"));
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    //config.setOpenMode(OpenMode.CREATE);
    IndexWriter w = new IndexWriter(index, config);
    try (ItemProvider provider = new ItemProvider(
            "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) {

        while (provider.hasNext()) {
            Item item = provider.next();
            addItem(w, item);//from  w  w w. j  ava  2 s . c  o  m

        }
    } catch (XMLStreamException | IOException ex) {
        ex.getMessage();
    }
    //        w.commit();
    w.close();

    //        String queryStr = "id:1* NOT id:19*";
    String a = "id:1* NOT id:19*";
    String b = "name:Dekielek AND description:(ty AND obiektywu)";
    String c = "category:Dek*";
    String ds = "id:1232~2";
    String e = "price:[0.0 TO 100.0]";

    Query q = new QueryParser("name", analyzer).parse(ds);

    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, hitsPerPage);
    ScoreDoc[] hits = docs.scoreDocs;

    System.out.println("Found " + hits.length + " hits.");
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        System.out
                .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description"));
    }
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//w w  w  .j  ava 2s.  c  om
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        //         MorphemeAnalyzer ma = new MorphemeAnalyzer();

        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}