Example usage for org.apache.lucene.index IndexWriterConfig setCodec

List of usage examples for org.apache.lucene.index IndexWriterConfig setCodec

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setCodec.

Prototype

public IndexWriterConfig setCodec(Codec codec) 

Source Link

Document

Set the Codec .

Usage

From source file:gov.nist.basekb.FreebaseIndexer.java

License:LGPL

public void initializeIndexBuilder() throws Exception {
    // Create a new index directory and writer to index a triples file.
    // Raise an error if an index already exists, so we don't accidentally overwrite it.
    String indexDir = getIndexDirectoryName();
    if ((new File(indexDir)).isDirectory())
        throw new IOException("Index directory already exists, remove it before indexing");

    indexDirectory = FSDirectory.open(Paths.get(indexDir));
    IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer());

    // we always create a new index from scratch:
    iwc.setOpenMode(OpenMode.CREATE);//from   w  ww  . j av  a2s.  c  om
    iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default
    //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION));  // slower, but better compression

    indexWriter = new IndexWriter(indexDirectory, iwc);
    indexAnalyzer = getIndexAnalyzer();

    if (INDEX_PREDICATES)
        printlnProg("Indexing individual predicates");
    if (INDEX_TEXT)
        printlnProg("Indexing combined predicate text values");
    if (INDEX_LANGUAGE)
        printlnProg("Indexing predicates for language(s): " + supportedLanguages);
}

From source file:gov.nist.basekb.FreebaseTools.java

License:LGPL

public void initializeIndexBuilder() throws Exception {
    // Create a new index directory and writer to index a triples file.
    // Raise an error if an index already exists, so we don't accidentally overwrite it.
    String indexDir = getIndexDirectoryName();
    if ((new java.io.File(indexDir)).isDirectory())
        throw new IOException("Index directory already exists, remove it before indexing");

    indexDirectory = FSDirectory.open(Paths.get(indexDir));
    IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer());

    // we always create a new index from scratch:
    iwc.setOpenMode(OpenMode.CREATE);/*from   w  w  w .  j a va 2  s. c o m*/
    iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default
    //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION));  // slower, but better compression

    indexWriter = new IndexWriter(indexDirectory, iwc);
    indexAnalyzer = getIndexAnalyzer();

    if (INDEX_PREDICATES)
        printlnProg("Indexing individual predicates");
    if (INDEX_TEXT)
        printlnProg("Indexing combined predicate text values");
    if (INDEX_LANGUAGE)
        printlnProg("Indexing predicates for language(s): " + supportedLanguages);
}

From source file:lsre.utils.LuceneUtils.java

License:Open Source License

/**
 * Creates an IndexWriter for given index path, with given analyzer.
 *
 * @param directory the path to the index directory
 * @param create    set to true if you want to create a new index
 * @param analyzer  gives the analyzer used for the Indexwriter.
 * @return an IndexWriter/*w ww . j  a v a 2  s  .co  m*/
 * @throws IOException
 */
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer)
        throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer
    else if (analyzer == AnalyzerType.KeywordAnalyzer)
        tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token.
    else if (analyzer == AnalyzerType.StandardAnalyzer)
        tmpAnalyzer = new StandardAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    config.setRAMBufferSizeMB(512);
    config.setCommitOnClose(true);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.

    config.setCodec(new LsreCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:lsre.utils.LuceneUtils.java

License:Open Source License

public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer,
        double RAMBufferSize) throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;/*  w  ww . ja v  a 2  s .  co m*/
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer();
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.
    config.setRAMBufferSizeMB(RAMBufferSize);
    config.setCodec(new LsreCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java

License:Open Source License

public void run() {
    IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer());
    config.setOpenMode(openMode);/*from w w w  .  jav  a2 s.c o m*/
    config.setCodec(new LireCustomCodec());
    try {
        if (imageDirectory != null)
            System.out.println("Getting all images in " + imageDirectory + ".");
        writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
        if (imageList == null) {
            files = FileUtils.getAllImages(new File(imageDirectory), true);
        } else {
            files = new LinkedList<String>();
            BufferedReader br = new BufferedReader(new FileReader(imageList));
            String line = null;
            while ((line = br.readLine()) != null) {
                if (line.trim().length() > 3)
                    files.add(line.trim());
            }
        }
        numImages = files.size();
        System.out.printf("Indexing %d images\n", files.size());
        Thread p = new Thread(new Producer());
        p.start();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        long l = System.currentTimeMillis();
        for (int i = 0; i < numberOfThreads; i++) {
            Thread c = new Thread(new Consumer());
            c.start();
            threads.add(c);
        }
        Thread m = new Thread(new Monitoring());
        m.start();
        for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) {
            iterator.next().join();
        }
        long l1 = System.currentTimeMillis() - l;
        int seconds = (int) (l1 / 1000);
        int minutes = seconds / 60;
        seconds = seconds % 60;
        // System.out.println("Analyzed " + overallCount + " images in " + seconds + " seconds, ~" + ((overallCount>0)?(l1 / overallCount):"n.a.") + " ms each.");
        System.out.printf("Analyzed %d images in %03d:%02d ~ %3.2f ms each.\n", overallCount, minutes, seconds,
                ((overallCount > 0) ? ((float) l1 / (float) overallCount) : -1f));
        writer.commit();
        writer.forceMerge(1);
        writer.close();
        threadFinished = true;
        // add local feature hist here
    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:net.semanticmetadata.lire.indexing.tools.Indexor.java

License:Open Source License

public void run() {
    // do it ...// w ww. j av  a 2 s  . co  m
    try {
        //            IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
        IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION,
                new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        config.setCodec(new LireCustomCodec());
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
        for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext();) {
            File inputFile = iterator.next();
            if (verbose)
                System.out.println("Processing " + inputFile.getPath() + ".");
            readFile(indexWriter, inputFile);
            if (verbose)
                System.out.println("Indexing finished.");
        }
        indexWriter.commit();
        indexWriter.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:net.semanticmetadata.lire.utils.LuceneUtils.java

License:Open Source License

/**
 * Creates an IndexWriter for given index path, with given analyzer.
 *
 * @param directory the path to the index directory
 * @param create    set to true if you want to create a new index
 * @param analyzer  gives the analyzer used for the Indexwriter.
 * @return an IndexWriter/*from w  w w  .j a va  2 s. c om*/
 * @throws IOException
 */
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer)
        throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer
    else if (analyzer == AnalyzerType.KeywordAnalyzer)
        tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token.
    else if (analyzer == AnalyzerType.StandardAnalyzer)
        tmpAnalyzer = new StandardAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    config.setRAMBufferSizeMB(512);
    config.setCommitOnClose(true);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.

    config.setCodec(new LireCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:net.semanticmetadata.lire.utils.LuceneUtils.java

License:Open Source License

public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer,
        double RAMBufferSize) throws IOException {
    // set the analyzer according to the method params
    Analyzer tmpAnalyzer = null;/*from  ww w .  java  2 s  .  c o  m*/
    if (analyzer == AnalyzerType.SimpleAnalyzer)
        tmpAnalyzer = new SimpleAnalyzer();
    else if (analyzer == AnalyzerType.WhitespaceAnalyzer)
        tmpAnalyzer = new WhitespaceAnalyzer();

    // The config
    IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer);
    if (create)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists.
    else
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise.
    config.setRAMBufferSizeMB(RAMBufferSize);
    config.setCodec(new LireCustomCodec());
    return new IndexWriter(directory, config);
}

From source file:nicta.com.au.patent.pac.index.PACIndexer.java

public PACIndexer(String indexDir) throws IOException {
    File indexDirFile = new File(indexDir);

    Map<String, Analyzer> analyzerPerField = new HashMap<>();
    analyzerPerField.put(PatentDocument.Title,
            new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.TITLE_ENGLISH_STOP_WORDS_SET));
    analyzerPerField.put(PatentDocument.Abstract,
            new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.ABSTRACT_ENGLISH_STOP_WORDS_SET));
    analyzerPerField.put(PatentDocument.Description,
            new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.DESCRIPTION_ENGLISH_STOP_WORDS_SET));
    analyzerPerField.put(PatentDocument.Claims,
            new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.CLAIMS_ENGLISH_STOP_WORDS_SET));
    aWrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_48), analyzerPerField);

    analyzer = new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.ENGLISH_STOP_WORDS_SET);
    //        analyzer = new StandardAnalyzer(Version.LUCENE_48);
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_48, aWrapper);
    conf.setUseCompoundFile(false);/*w  ww.  j a  v a  2 s  .c o  m*/
    conf.setCodec(new SimpleTextCodec());
    writer = new IndexWriter(FSDirectory.open(indexDirFile), conf);
}

From source file:org.apache.blur.filter.FilterCacheTest.java

License:Apache License

private void writeDocs(FilterCache filterCache, RAMDirectory directory) throws IOException {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    conf.setCodec(new Blur024Codec());
    IndexWriter indexWriter = new IndexWriter(directory, conf);
    int count = 10000;
    addDocs(indexWriter, count);//from  w w  w  .j a v  a  2 s.c  o  m
    indexWriter.close();
}