Example usage for org.apache.lucene.index IndexWriter IndexWriter

List of usage examples for org.apache.lucene.index IndexWriter IndexWriter

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter IndexWriter.

Prototype

public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException 

Source Link

Document

Constructs a new IndexWriter per the settings given in conf.

Usage

From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically
 * enriched by a URI pointing to a SKOS concept "weapons".
 * <p/>/* ww  w . ja  v a2  s.c o m*/
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 *
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            TextField.TYPE_NOT_STORED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";
    String indexPath = "build/";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD)
            .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    TopDocs results = searcher.search(builder.build(), 10);

    /* the document matches because "arms" is among the expanded terms */
    assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query, 10);

    /* ... also returns the document as result */
    assertEquals(1, results.totalHits);

}

From source file:at.ac.univie.mminf.luceneSKOS.URIbasedTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which is semantically enriched
 * by a URI pointing to a SKOS concept "weapons".
 * //from ww  w  .  j  a  v  a2  s  . c o  m
 * A search for "arms" returns that record as a result because "arms" is
 * defined as an alternative label (altLabel) for the concept "weapons".
 * 
 * @throws IOException
 */
@Test
public void uriBasedTermExpansion() throws IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", Field.Store.NO,
            Field.Index.ANALYZED));

    /* setting up the SKOS analyzer */
    String skosFile = "src/test/resources/skos_samples/ukat_examples.n3";

    /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */
    Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile, ExpansionType.URI);

    /* Define different analyzers for different fields */
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("subject", skosAnalyzer);
    PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(matchVersion),
            analyzerPerField);

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(matchVersion, indexAnalyzer));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery query1 = new BooleanQuery();
    query1.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD);
    query1.add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD);
    query1.add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(IndexReader.open(writer, false));

    TopDocs results = searcher.search(query1, 10);

    /* the document matches because "arms" is among the expanded terms */
    Assert.assertEquals(1, results.totalHits);

    /* defining a query that searches for a broader concept */
    Query query2 = new TermQuery(new Term("subject", "military equipment"));

    results = searcher.search(query2, 10);

    /* ... also returns the document as result */
    Assert.assertEquals(1, results.totalHits);

}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates an index writer in the specified directory.  It will create/recreate
 * the target directory//from   ww w.j  a  v  a 2 s  . c  om
 *
 * @param directory
 * @param analyzer
 * @return
 * @throws Exception
 */
protected IndexWriter createIndexWriter(File directory, Analyzer analyzer, boolean replace) throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer);
    if (replace)
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    else
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    if (directory.exists() && replace) {
        FileUtils.forceDelete(directory);
    }
    FileUtils.forceMkdir(directory);
    IndexWriter iw = new IndexWriter(FSDirectory.open(directory), conf);
    return iw;
}

From source file:au.org.ala.names.search.ALANameSearcher.java

License:Open Source License

private File createIfNotExist(String indexDirectory) throws IOException {

    File idxFile = new File(indexDirectory);
    if (!idxFile.exists()) {
        FileUtils.forceMkdir(idxFile);/*from www. ja  va2  s.  c o m*/
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        IndexWriter iw = new IndexWriter(FSDirectory.open(idxFile), conf);
        iw.commit();
        iw.close();
    }
    return idxFile;
}

From source file:back.Indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = ".\\indexed";
    String docsPath = ".//artigos";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];//w  ww.  j  a v  a2 s. co m
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:bajavista.IndiceInvertido.java

public void crearIndiceInvertido() throws IOException {
    // 0. Specify the analyzer for tokenizing text.
    //    The same analyzer should be used for indexing and searching
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    // 1. Create the index 
    File indexDirES = new File(dirIndexES);
    Directory indexES = FSDirectory.open(indexDirES);
    //File indexDirNONES = new File(dirIndexNONES);
    //Directory indexNONES = FSDirectory.open(indexDirNONES);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);

    IndexWriter wES = new IndexWriter(indexES, config);
    //IndexWriter wNONES = new IndexWriter(indexNONES, config);

    ConexionBD db = new ConexionBD();
    try {/*www .j  a  va  2s .c  om*/
        try (PreparedStatement consulta = db.getConnection().prepareStatement("SELECT * FROM Tweet");
                ResultSet res = consulta.executeQuery()) {
            while (res.next()) {
                //System.out.println(res.getString("idUser") +" "+ res.getString("timestamp") +" "+ res.getString("text") +" "+ res.getString("objective") +" "+ res.getString("subjective") +" "+ res.getString("positive") +" "+ res.getString("negative") +" "+ res.getString("need"));
                agregarDoc(wES, res.getString("idUser"), res.getString("timestamp"), res.getString("text"),
                        res.getString("objective"), res.getString("subjective"), res.getString("positive"),
                        res.getString("negative"), res.getString("need"));
            }

        }

    } catch (Exception e) {
        System.out.print("No se pudo consultar a la base de datos\n" + e);
    }

    //    try {
    //            File f = new File(baseDatosNONES);
    //            FileReader fr = new FileReader(f);
    //            BufferedReader br = new BufferedReader(fr);
    //            String linea = br.readLine();
    //
    //            while ((linea = br.readLine()) != null) {
    //      StringTokenizer separarLinea = new StringTokenizer(linea, "|");
    //      String next = separarLinea.nextToken();;
    //                String next1 = separarLinea.nextToken();;
    //                String next2 = separarLinea.nextToken();;
    //                
    //                addDoc(wNONES, next, next1,next2);
    //            }
    //
    //            fr.close();
    //        } catch (Exception e) {
    //            System.out.println("Error en la lectura del archivo...");
    //        }
    db.desconectar();

    wES.close();
    //wNONES.close();
}

From source file:bbejeck.nosql.lucene.LuceneSqlFileSystemSearchBase.java

License:Apache License

public void init(String path) throws Exception {
    fsDirectory = FSDirectory.open(Paths.get(path));
    iwriter = new IndexWriter(fsDirectory, config);

}

From source file:bbejeck.nosql.lucene.LuceneSqlSearchBase.java

License:Apache License

public void init() throws Exception {
    iwriter = new IndexWriter(ramDirectory, config);
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.LuceneIndexBolt.java

License:Apache License

@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    this.collector = collector;
    try {//from  w w w  .  j av a  2s.  c  o m
        logger.info("Opening index");
        Directory dir = FSDirectory.open(new File(indexLocation));
        NewsRecLuceneAnalyzer analyzer = LanguageAnalyzerHelper.getInstance().getAnalyzer(Locale.ENGLISH);
        this.termExtract = new LuceneTopTermExtract(analyzer);
        IndexWriterConfig config = new IndexWriterConfig(Config.LUCENE_VERSION, analyzer);
        writer = new IndexWriter(dir, config);
    } catch (IOException ex) {
        logger.error(ex);
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.stormNewsFetch.storm.bolts.LuceneIndexBolt.java

License:Apache License

@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    this.collector = collector;
    try {//  w  w w. ja v  a2  s  .  c o m
        logger.info("Opening index");
        Directory dir = FSDirectory.open(new File(indexLocation));
        EnAnalyzer analyzer = new EnAnalyzer();
        analyzer.setStopwords(getStopwords(stopwordsLocation));
        IndexWriterConfig config = new IndexWriterConfig(Config.LUCENE_VERSION, analyzer);
        writer = new IndexWriter(dir, config);
    } catch (IOException ex) {
        logger.error(ex);
    }
}