Example usage for org.apache.lucene.index IndexWriterConfig setSimilarity

List of usage examples for org.apache.lucene.index IndexWriterConfig setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setSimilarity.

Prototype

public IndexWriterConfig setSimilarity(Similarity similarity) 

Source Link

Document

Expert: set the Similarity implementation used by this IndexWriter.

Usage

From source file:com.github.alvanson.xltsearch.IndexTask.java

License:Apache License

@Override
protected Boolean call() {
    IndexWriter iwriter = null;/*from w w w. jav  a 2  s.c o  m*/
    boolean result = false;

    updateMessage("started");
    try {
        int count = 0;
        Docket docket;

        IndexWriterConfig iwconfig = new IndexWriterConfig(config.getVersion(), config.getAnalyzer());
        iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwconfig.setSimilarity(config.getSimilarity());
        iwriter = new IndexWriter(config.getDirectory(), iwconfig);

        while ((docket = inQueue.take()) != Docket.DONE) {
            count++;
            updateMessage(docket.relPath);
            switch (docket.status) {
            case PARSED:
                // index parsed file
                Document doc = new Document();
                // store relative path  ** must be indexed for updateDocument
                doc.add(new StringField(config.pathField, docket.relPath, Field.Store.YES));
                // index content
                doc.add(new TextField(config.contentField, docket.content.toString(), Field.Store.NO));
                // index standard metadata
                for (Map.Entry<String, Property> e : config.metadataFields.entrySet()) {
                    for (String value : docket.metadata.getValues(e.getValue())) {
                        doc.add(new TextField(e.getKey(), value, Field.Store.YES));
                    }
                }
                // store hashsum
                doc.add(new StringField(config.hashSumField, docket.hashSum, Field.Store.YES));
                // add/update document
                iwriter.updateDocument(new Term(config.pathField, docket.relPath), doc);
                // fall through
            case PASS:
                break;
            case DELETE:
                iwriter.deleteDocuments(new Term(config.pathField, docket.relPath));
                break;
            default:
                logger.error("Unexpected docket state while processing {}: {}", docket.relPath,
                        docket.status.toString());
                cancel(true); // cancel task
            }
            updateProgress(count, count + docket.workLeft);
        }
        // end of queue
        updateMessage("complete");
        updateProgress(count, count + docket.workLeft);
        result = true;
    } catch (IOException ex) {
        updateMessage("I/O exception");
        logger.error("I/O exception while writing to index", ex);
    } catch (InterruptedException ex) {
        if (isCancelled()) {
            updateMessage("cancelled");
        } else {
            updateMessage("interrupted");
            logger.error("Interrupted", ex);
        }
    }
    // close iwriter
    if (iwriter != null) {
        try {
            iwriter.close();
        } catch (IOException ex) {
            logger.warn("I/O exception while closing index writer", ex);
        }
    }
    return result;
}

From source file:com.jaeksoft.searchlib.index.WriterLocal.java

License:Open Source License

private final IndexWriter open(boolean create)
        throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null);
    config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setWriteLockTimeout(indexConfig.getWriteLockTimeout());
    Similarity similarity = indexConfig.getNewSimilarityInstance();
    if (similarity != null)
        config.setSimilarity(similarity);
    Logging.debug("WriteLocal open " + indexDirectory.getDirectory());
    return new IndexWriter(indexDirectory.getDirectory(), config);
}

From source file:com.jaeksoft.searchlib.index.WriterLucene.java

License:Open Source License

private final IndexWriter open(boolean create)
        throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null);
    config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND);
    config.setMergeScheduler(new SerialMergeScheduler());
    Similarity similarity = indexConfig.getNewSimilarityInstance();
    if (similarity != null)
        config.setSimilarity(similarity);
    Logging.debug("WriteLocal open " + indexDirectory.getDirectory());
    return new IndexWriter(indexDirectory.getDirectory(), config);
}

From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java

License:Apache License

protected void setupWriter(Context context) throws IOException {
    nullCheck(_directory);/*from   w w w  .ja v  a 2  s .  c  o  m*/
    nullCheck(_analyzer);
    IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, _analyzer);
    config.setSimilarity(new FairSimilarity());
    config.setRAMBufferSizeMB(_blurTask.getRamBufferSizeMB());
    TieredMergePolicy mergePolicy = (TieredMergePolicy) config.getMergePolicy();
    mergePolicy.setUseCompoundFile(false);
    _writer = new IndexWriter(_directory, config);
}

From source file:com.qwazr.search.index.IndexInstance.java

License:Apache License

/**
 * @param schema/*  w w w.  ja v a  2  s .  c  om*/
 * @param indexDirectory
 * @return
 */
final static IndexInstance newInstance(SchemaInstance schema, File indexDirectory,
        IndexSettingsDefinition settings)
        throws ServerException, IOException, ReflectiveOperationException, InterruptedException {
    UpdatableAnalyzer indexAnalyzer = null;
    UpdatableAnalyzer queryAnalyzer = null;
    IndexWriter indexWriter = null;
    Directory dataDirectory = null;
    try {

        if (!indexDirectory.exists())
            indexDirectory.mkdir();
        if (!indexDirectory.isDirectory())
            throw new IOException(
                    "This name is not valid. No directory exists for this location: " + indexDirectory);

        FileSet fileSet = new FileSet(indexDirectory);

        //Loading the settings
        if (settings == null) {
            settings = fileSet.settingsFile.exists()
                    ? JsonMapper.MAPPER.readValue(fileSet.settingsFile, IndexSettingsDefinition.class)
                    : IndexSettingsDefinition.EMPTY;
        } else {
            JsonMapper.MAPPER.writeValue(fileSet.settingsFile, settings);
        }

        //Loading the fields
        File fieldMapFile = new File(indexDirectory, FIELDS_FILE);
        LinkedHashMap<String, FieldDefinition> fieldMap = fieldMapFile.exists()
                ? JsonMapper.MAPPER.readValue(fieldMapFile, FieldDefinition.MapStringFieldTypeRef)
                : new LinkedHashMap<>();

        //Loading the fields
        File analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE);
        LinkedHashMap<String, AnalyzerDefinition> analyzerMap = analyzerMapFile.exists()
                ? JsonMapper.MAPPER.readValue(analyzerMapFile, AnalyzerDefinition.MapStringAnalyzerTypeRef)
                : new LinkedHashMap<>();

        AnalyzerContext context = new AnalyzerContext(analyzerMap, fieldMap);
        indexAnalyzer = new UpdatableAnalyzer(context, context.indexAnalyzerMap);
        queryAnalyzer = new UpdatableAnalyzer(context, context.queryAnalyzerMap);

        // Open and lock the data directory
        dataDirectory = FSDirectory.open(fileSet.dataDirectory.toPath());

        // Set
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(indexAnalyzer);
        if (settings != null && settings.similarity_class != null)
            indexWriterConfig.setSimilarity(IndexUtils.findSimilarity(settings.similarity_class));
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(
                indexWriterConfig.getIndexDeletionPolicy());
        indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy);
        indexWriter = new IndexWriter(dataDirectory, indexWriterConfig);
        if (indexWriter.hasUncommittedChanges())
            indexWriter.commit();

        // Finally we build the SearchSearcherManger
        SearcherManager searcherManager = new SearcherManager(indexWriter, true, null);

        return new IndexInstance(schema, dataDirectory, settings, analyzerMap, fieldMap, fileSet, indexWriter,
                searcherManager, queryAnalyzer);
    } catch (IOException | ServerException | ReflectiveOperationException | InterruptedException e) {
        // We failed in opening the index. We close everything we can
        if (queryAnalyzer != null)
            IOUtils.closeQuietly(queryAnalyzer);
        if (indexAnalyzer != null)
            IOUtils.closeQuietly(indexAnalyzer);
        if (indexWriter != null)
            IOUtils.closeQuietly(indexWriter);
        if (dataDirectory != null)
            IOUtils.closeQuietly(dataDirectory);
        throw e;
    }
}

From source file:cs571.proj1.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from www  . j ava2  s .  c  om
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        } else if ("-tfidf".equals(args[i])) {
            tfidf = true;
        } else if ("-bm25".equals(args[i])) {
            bm25 = true;
        }

    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (tfidf)
            iwc.setSimilarity(new TFIDF());
        if (bm25)
            iwc.setSimilarity(new BM25());
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        System.out.println("Total # of Docs Indexed: " + numOfDocuments);

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache,
        ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException {
    previewProcessor = aPreviewProcessor;
    configuration = aConfiguration;//from   w w  w  . ja  v  a 2 s.  c  om
    analyzerCache = aAnalyzerCache;
    executorPool = aExecutorPool;

    contentFieldType = new FieldType();
    contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    contentFieldType.setStored(true);
    contentFieldType.setTokenized(true);
    contentFieldType.setStoreTermVectorOffsets(true);
    contentFieldType.setStoreTermVectorPayloads(true);
    contentFieldType.setStoreTermVectorPositions(true);
    contentFieldType.setStoreTermVectors(true);

    analyzer = analyzerCache.getAnalyzer();

    File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index");
    theIndexDirectory.mkdirs();

    Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100,
            100);

    IndexWriterConfig theConfig = new IndexWriterConfig(analyzer);
    theConfig.setSimilarity(new CustomSimilarity());
    indexWriter = new IndexWriter(theIndexFSDirectory, theConfig);

    searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory());

    commitThread = new Thread("Lucene Commit Thread") {
        @Override
        public void run() {
            while (!isInterrupted()) {

                if (indexWriter.hasUncommittedChanges()) {
                    try {
                        indexWriter.commit();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }

                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // Do nothing here
                }
            }
        }
    };
    commitThread.start();

    facetsConfig = new FacetsConfig();
}

From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexWriter.java

License:Open Source License

private IndexWriterConfig createWriterConfig() {
    final IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, WRITE_ANALYZER);
    config.setSimilarity(SIMILARITY);
    config.setMaxThreadStates(Math.min(Math.max(2, Runtime.getRuntime().availableProcessors() - 3), 8));
    config.setRAMPerThreadHardLimitMB(512);
    return config;
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexer.java

License:Open Source License

public void createIndex() {
    try {/*from  w  w w.  j a  v  a  2s. c om*/

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Indexing to directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Indexing");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexUpdater.java

License:Open Source License

public void updateIndex() {
    try {//  w  ww . jav  a2 s  .c  om

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Update directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Updating");

    } catch (Exception e) {
        e.printStackTrace();
    }
}