Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.czw.search.lucene.example.facet.SimpleSortedSetFacetsExample.java

License:Apache License

/**
 * Build the example index./*from  w  ww .  j  ava2 s  . c o  m*/
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    Document doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Bob"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Susan"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Frank"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999"));
    indexWriter.addDocument(config.build(doc));

    indexWriter.close();
}

From source file:com.czw.search.lucene.example.xmlparser.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, StandardCharsets.UTF_8));
    String line = br.readLine();//from www .  j  a va  2s . c o m
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

/**
 * Constructor for LuceneIndex/*from   w ww . j  a v a  2 s. c  o m*/
 * 
 * @param dataDirectory   Path to the directory to create an index directory within.
 * @throws IndexException
 */
public LuceneIndex(Path dataDirectory) throws IndexException {

    //TODO: Check to make sure directory is read/writable
    path = dataDirectory.resolve(INDEXDIR);

    try {
        dir = FSDirectory.open(path);
        analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        writer = new IndexWriter(dir, iwc);

        reader = DirectoryReader.open(writer, false);
        searcher = new IndexSearcher(reader);
        parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer);
    } catch (IOException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to the specified project's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFileId a FeatureFile, for which features to save
 * @param projectId a project, for which to write an index
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException/*from w  ww.  j a  v  a 2s .  co  m*/
 */
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForProject(projectId);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFileId);

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to it's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFile a FeatureFile, for which features to save
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException//  ww  w  .  j av  a  2  s .  c  om
 */
public void writeLuceneIndexForFile(final FeatureFile featureFile,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForFile(featureFile);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());
        facetsConfig.setIndexFieldName(FeatureIndexFields.F_UID.getFieldName(),
                FeatureIndexFields.FACET_UID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFile.getId());

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Deletes features from specified feature files from project's index
 *
 * @param projectId a project to delete index entries
 * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file
 *                entries, pass FeatureType.GENE
 *///  w w  w .  j a  v  a  2 s. co m
public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
    if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) {
        return;
    }

    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.getIndexForProject(projectId);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        if (fileManager.indexForProjectExists(projectId)) {
            for (Pair<FeatureType, Long> id : fileIds) {
                deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer);
            }
        }
    } catch (IOException e) {
        LOGGER.error("Exception while deleting from index:", e);
    }
}

From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 *//*  www. j a  va2  s  .com*/
public static void main(String[] args) {
    String usage = "java com.flycode.CRIBSearch.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles class";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.fun.sb.demo.lucene.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "/Users/baidu/temp/index/";
    String docsPath = "/Users/baidu/temp/";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/* w  ww . j  a va2s.  c o  m*/
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.gemstone.gemfire.cache.lucene.internal.distributed.DistributedScoringJUnitTest.java

License:Apache License

private IndexRepositoryImpl createIndexRepo() throws IOException {
    ConcurrentHashMap<String, File> fileRegion = new ConcurrentHashMap<String, File>();
    ConcurrentHashMap<ChunkKey, byte[]> chunkRegion = new ConcurrentHashMap<ChunkKey, byte[]>();
    RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion);

    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);

    return new IndexRepositoryImpl(region, writer, mapper);
}

From source file:com.gemstone.gemfire.cache.lucene.internal.IndexRepositoryFactory.java

License:Apache License

public IndexRepository createIndexRepository(final Integer bucketId, PartitionedRegion userRegion,
        PartitionedRegion fileRegion, PartitionedRegion chunkRegion, LuceneSerializer serializer,
        Analyzer analyzer, LuceneIndexStats indexStats, FileSystemStats fileSystemStats) throws IOException {
    final IndexRepository repo;
    BucketRegion fileBucket = getMatchingBucket(fileRegion, bucketId);
    BucketRegion chunkBucket = getMatchingBucket(chunkRegion, bucketId);
    if (fileBucket == null || chunkBucket == null) {
        return null;
    }//from w  ww. jav  a 2s  . c o  m
    RegionDirectory dir = new RegionDirectory(fileBucket, chunkBucket, fileSystemStats);
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(dir, config);
    repo = new IndexRepositoryImpl(fileBucket, writer, serializer, indexStats);
    return repo;
}