Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer) 

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java

License:Open Source License

public static void main(String[] args) throws Exception {
    String DOC_DIR_NAME = "files";
    filesDirPath = Paths.get(DOC_DIR_NAME).toAbsolutePath().toString();

    Path indexPath = Paths.get("index");
    Path docDir = Paths.get(DOC_DIR_NAME);

    Path file1 = Paths.get(DOC_DIR_NAME, "File1");
    Path file2 = Paths.get(DOC_DIR_NAME, "File1A");

    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    if (!Files.isReadable(docDir)) {
        System.out.println("document folder not found");
        return;/*from w  w  w  .j  a v  a  2s .  co  m*/
    }

    Directory index = FSDirectory.open(indexPath);

    IndexWriter writer = new IndexWriter(index, iwc);

    //add files to index
    indexDocs(writer, file1);
    indexDocs(writer, file2);
    writer.commit();

    searchAndPrintResult(indexPath);

    //delete files
    System.out.println();
    System.out.println("==================================================================");
    System.out.println("delete by prefix \"" + filesDirPath + "/File1\"");
    Query query = new PrefixQuery(new Term(PATH, filesDirPath + "/File1"));

    writer.deleteDocuments(query);
    writer.close();

    searchAndPrintResult(indexPath);
}

From source file:com.codenvy.test.lucene.IndexFiles.java

License:Open Source License

/** Index all text files under a directory. */

public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w w w.jav a2s . co  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;

        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;

        } else if ("-update".equals(args[i])) {
            create = false;

        }

    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);

    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);

    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);

        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);

        }

        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());

    }

}

From source file:com.company.Indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w w w  .j a  va2s  .com
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");
        PrintWriter tempwriter = new PrintWriter("pathToDocs.txt", "UTF-8");
        tempwriter.print(docsPath);
        tempwriter.close();
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.company.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from   ww w. j  ava  2s . c o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//from  w w  w .  java 2 s . c  o  m
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoublePoint("latitude", 40.759011));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoublePoint("longitude", -73.9844722));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);//from   ww w . jav a  2 s  .  co m

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.718266));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoublePoint("longitude", -74.007819));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.7051157));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoublePoint("longitude", -74.0088305));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer));
    writer.close();
}

From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*from   w ww.  j  a  v  a 2  s .  c  o m*/
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//  ww  w.j a  v  a  2s  . c om
    taxoWriter.close();
}

From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongPoint("timestamp", then));
        indexWriter.addDocument(doc);/*from  w w w  .j a va2  s. com*/
    }

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
    indexWriter.close();
}

From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java

License:Apache License

/**
 * Build the example index.//from w  w  w  . jav a 2  s .  c  o m
 */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "art"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    doc.add(new FacetField("Category", "biography"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    doc.add(new FacetField("Category", "food"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    doc.add(new FacetField("Category", "crime"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2011", "5", "15"));
    doc.add(new FacetField("Category", "fiction"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "2017", "6", "5"));
    doc.add(new FacetField("Category", "science"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();
    taxoWriter.close();
}