Example usage for org.apache.lucene.index IndexWriter IndexWriter

List of usage examples for org.apache.lucene.index IndexWriter IndexWriter

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter IndexWriter.

Prototype

public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException 

Source Link

Document

Constructs a new IndexWriter per the settings given in conf.

Usage

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to the specified project's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFileId a FeatureFile, for which features to save
 * @param projectId a project, for which to write an index
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException// w w  w .j  av  a2  s  .  c om
 */
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForProject(projectId);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFileId);

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to it's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFile a FeatureFile, for which features to save
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException// w ww  . j a  va2s. c  om
 */
public void writeLuceneIndexForFile(final FeatureFile featureFile,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForFile(featureFile);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());
        facetsConfig.setIndexFieldName(FeatureIndexFields.F_UID.getFieldName(),
                FeatureIndexFields.FACET_UID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFile.getId());

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Deletes features from specified feature files from project's index
 *
 * @param projectId a project to delete index entries
 * @param fileIds a list of Pair of feature types to file Ids, which entries to delete. To delete gene file
 *                entries, pass FeatureType.GENE
 *///  w  w  w. ja v  a  2s.c  o m
public void deleteFromIndexByFileId(final long projectId, List<Pair<FeatureType, Long>> fileIds) {
    if (fileIds == null || fileIds.isEmpty() || !fileManager.indexForProjectExists(projectId)) {
        return;
    }

    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.getIndexForProject(projectId);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        if (fileManager.indexForProjectExists(projectId)) {
            for (Pair<FeatureType, Long> id : fileIds) {
                deleteDocumentByTypeAndId(id.getKey(), id.getValue(), writer);
            }
        }
    } catch (IOException e) {
        LOGGER.error("Exception while deleting from index:", e);
    }
}

From source file:com.epimorphics.server.indexers.LuceneIndex.java

License:Apache License

protected synchronized IndexWriter getIndexWriter() {
    if (writer == null) {
        try {/*ww  w.  java2 s.c o  m*/
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);
            writer = new IndexWriter(indexDir, config);
            config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        } catch (Exception e) {
            throw new EpiException(e);
        }
    }
    return writer;
}

From source file:com.evoapps.indexPubData.IndexFiles.java

License:Apache License

void indextitle(String id, String title) {

    iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
    } else {/*from  ww w .jav  a  2  s. c  o  m*/
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }

    try {
        writer = new IndexWriter(dir, iwc);
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    Document doc = new Document();

    Field pathField = new StringField("path", id, Field.Store.YES);
    doc.add(pathField);
    //      doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
    //doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
    //check the role of UTF-8 string in the doc.add() method
    doc.add(new TextField("contents", title, Field.Store.YES));

    try {
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);

            writer.addDocument(doc);

        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            //System.out.println("updating " + file);
            writer.updateDocument(new Term("path", id), doc);
            writer.close();
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:com.evoapps.indexPubData.IndexFiles.java

License:Apache License

public void indexDocumentList(ArrayList<com.evoapps.indexPubData.Document> list) {
    Iterator<com.evoapps.indexPubData.Document> it = list.iterator();
    com.evoapps.indexPubData.Document document;

    iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(OpenMode.CREATE);
    } else {/* ww  w  . ja va 2 s.  c o m*/
        // Add new documents to an existing index:
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }

    try {
        writer = new IndexWriter(dir, iwc);
    } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }

    while (it.hasNext()) {
        document = it.next();
        Document doc = new Document();

        Field pathField = new StringField("path", document.getTitle(), Field.Store.YES);
        doc.add(pathField);
        //              doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
        //doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
        //check the role of UTF-8 string in the doc.add() method
        System.out.println("path>>" + document.getTitle() + "content>>" + document.getContent());
        doc.add(new TextField("contents", document.getContent(), Field.Store.YES));

        System.out.println("doc" + doc.getFields());
        try {
            if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                // New index, so we just add the document (no old document can be there):
                System.out.println("adding " + document.getContent());

                writer.addDocument(doc);

            } else {
                // Existing index (an old copy of this document may have been indexed) so 
                // we use updateDocument instead to replace the old one matching the exact 
                // path, if present:
                //System.out.println("updating " + file);
                writer.updateDocument(new Term("path", document.getTitle()), doc);
                writer.close();
            }

        } catch (IOException e) {

        }
    }

    try {
        writer.close();
    } catch (IOException e) {
    }

}

From source file:com.example.search.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    // String docsPath = "E:\\work\\webExp\\exp\\sina_ansi";
    String docsPath = ".\\doc";
    boolean create = true;
    /*    for(int i=0;i<args.length;i++) {
          if ("-index".equals(args[i])) {
            indexPath = args[i+1];//from   ww w.j  a  v a 2 s  . c  o m
            i++;
          } else if ("-docs".equals(args[i])) {
            docsPath = args[i+1];
            i++;
          } else if ("-update".equals(args[i])) {
            create = false;
          }
        }*/

    /* if (docsPath == null) {
       System.err.println("Usage: " + usage);
       System.exit(1);
     }*/

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        //      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        Analyzer analyzer = new ICTCLASAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getMinutes() - start.getMinutes() + " total minutes");
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 */// w  ww  .ja va2 s .  c o m
public static void main(String[] args) {
    String usage = "java com.flycode.CRIBSearch.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles class";
    String indexPath = "index";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.FormBasedXmlQueryDemo.java

License:Apache License

private void openExampleIndex() throws IOException {
    //Create a RAM-based index from our test data file
    RAMDirectory rd = new RAMDirectory();
    IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
    IndexWriter writer = new IndexWriter(rd, iwConfig);
    InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
    BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8));
    String line = br.readLine();/*from ww w  . jav  a 2 s  .  c  o m*/
    final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
    textNoNorms.setOmitNorms(true);
    while (line != null) {
        line = line.trim();
        if (line.length() > 0) {
            //parse row and create a document
            StringTokenizer st = new StringTokenizer(line, "\t");
            Document doc = new Document();
            doc.add(new Field("location", st.nextToken(), textNoNorms));
            doc.add(new Field("salary", st.nextToken(), textNoNorms));
            doc.add(new Field("type", st.nextToken(), textNoNorms));
            doc.add(new Field("description", st.nextToken(), textNoNorms));
            writer.addDocument(doc);
        }
        line = br.readLine();
    }
    writer.close();

    //open searcher
    // this example never closes it reader!
    IndexReader reader = DirectoryReader.open(rd);
    searcher = new IndexSearcher(reader);
}

From source file:com.foundationdb.lucene.SimpleTest.java

License:Open Source License

@Test
public void indexBasic() throws Exception {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    // recreate the index on each execution
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    config.setCodec(new FDBCodec());
    FDBDirectory dir = createDirectoryForMethod();
    IndexWriter writer = new IndexWriter(dir, config);
    try {/*from  w  w  w . j  a va  2  s  . com*/
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.NO)));

        writer.addDocument(Arrays.asList(new TextField("title", "The title of the second document", Store.YES),
                new TextField("content", "And this is the content", Store.NO)));
    } finally {
        writer.close();
    }
    assertDocumentsAreThere(dir, 2);
}