Example usage for org.apache.lucene.util Version LATEST

List of usage examples for org.apache.lucene.util Version LATEST

Introduction

In this page you can find the example usage for org.apache.lucene.util Version LATEST.

Prototype

Version LATEST

To view the source code for org.apache.lucene.util Version LATEST.

Click Source Link

Document

WARNING: if you use this setting, and then upgrade to a newer release of Lucene, sizable changes may happen.

Usage

From source file:GUIFrame.java

public Directory InitializeIndex(Directory Index, PorterStemAnalyzer Analyzer) throws IOException {
    // Create the index
    Index = new RAMDirectory();
    IndexWriterConfig myConfig = new IndexWriterConfig(Version.LATEST, Analyzer);
    try (IndexWriter indoWriter = new IndexWriter(Index, myConfig)) {
        BufferedReader buffR = new BufferedReader(new FileReader("npl\\doc-text"));
        String currLine;//from ww  w  .j a  va2  s . co  m
        StringBuilder idDoc = new StringBuilder();
        StringBuilder sumDoc = new StringBuilder();
        currLine = buffR.readLine();

        while (currLine != null) {
            if (currLine.matches(".*\\d+.*")) { // Check if current line contains a document's id
                // Capture the document's id and summary
                idDoc.append(currLine);
                currLine = buffR.readLine();

                // While the reader holds line of summary
                while (!(currLine.contains("/"))) {
                    sumDoc.append(currLine).append(" ");
                    currLine = buffR.readLine();
                }

                // Add the new document to the index
                AddDocument(indoWriter, idDoc.toString(), sumDoc.toString());

                //System.out.println(idDoc.toString());
                //System.out.println(sumDoc.toString());

                // Clear the string builders
                idDoc.delete(0, idDoc.length());
                sumDoc.delete(0, sumDoc.length());

                // Continue to next line
                currLine = buffR.readLine();
            } else {
                // Continue to next line
                currLine = buffR.readLine();
            }
        }

    }

    return Index;
}

From source file:GUIFrame.java

public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput,
        DefaultListModel DocList) throws ParseException, IOException {
    // The query/*from   ww  w.jav  a 2s  .com*/
    userInput = userInput.replace("\"", "");
    Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput);

    // The search
    int hitsPerPage = 20; // return 20 top documents
    IndexReader indoReader = DirectoryReader.open(Index);
    IndexSearcher indoSearcher = new IndexSearcher(indoReader);
    TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true);
    indoSearcher.search(q, docCollector);
    ScoreDoc[] hits = docCollector.topDocs().scoreDocs;

    // Copy results to list models
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = indoSearcher.doc(docId);
        DocList.addElement(d.get("docID"));
        SumListModel.addElement(d.get("summary"));
    }

    GetTerms(Index, Analyzer, "summary", userInput);

    return hits.length;
}

From source file:analyzers.FormalAnalyzer.java

License:Apache License

/**
* Define how tokens are processed.//w  ww . java  2  s . c  om
*
* @param    fieldName    required input
* @param    reader       reader for document
*/
@Override
protected Analyzer.TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    Tokenizer tokenizer = new StandardTokenizer(reader);
    TokenStream chain = tokenizer;

    if (!tokenOpts.disableAllFilters) {
        // the chain of token filters...

        chain = new StandardFilter(chain);

        // discard tokens based on their type attribute
        chain = new StandardTagFilter(chain, tokenOpts);

        // convert tokens to lowercase
        chain = new LowerCaseFilter(chain);

        // replace accented chars with non-accented ASCII equivalents
        chain = new ASCIIFoldingFilter(chain);

        // remove stop words (must come after lowercasing)
        chain = new StopFilter(chain, stopWordSet);

        // remove 's
        chain = new EnglishPossessiveFilter(Version.LATEST, chain);

        // spelling correction            
        if (!spellingHashtable.isEmpty())
            chain = new SpellingCorrectionFilter(chain, spellingHashtable);

        if (!tokenOpts.disableStemming) {
            // Krovets stemmer (smarter than the Porter stemmer)
            chain = new KStemFilter(chain);
        }
    }

    return new Analyzer.TokenStreamComponents(tokenizer, chain);
}

From source file:buscador.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from  w  w  w .j  a va  2  s.  c o m
    }

    String index = "Zaguan1";
    String[] fields = { "title", "description", "identifier", "date", "creator" };
    BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD,
            BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };

    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;

        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SpanishAnalyzer(Version.LATEST);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }

    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = MultiFieldQueryParser.parse(line, fields, flags, analyzer);

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzer.java

License:Open Source License

/**
 * Creates//from w  ww .  java 2s. c  o m
 * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * used to tokenize all the text in the provided {@link Reader}.
 *
 * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
 * built from an {@link StandardTokenizer} filtered with
 * {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link StopFilter},
 * {@link ArabicRootExtractorStemFilter}, {@link SetKeywordMarkerFilter}
 */
@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer source;
    if (getVersion().onOrAfter(Version.LATEST)) {
        source = new StandardTokenizer();
    } else {
        source = new StandardTokenizer40();
    }
    TokenStream result = new LowerCaseFilter(source);
    if (getVersion().onOrAfter(Version.LUCENE_5_4_0)) {
        result = new DecimalDigitFilter(result);
    }
    // the order here is important: the stopword list is not normalized!
    result = new StopFilter(result, stopwords);
    result = new ArabicRootExtractorStemFilter(result);
    if (!stemExclusionSet.isEmpty()) {
        result = new SetKeywordMarkerFilter(result, stemExclusionSet);
    }

    return new TokenStreamComponents(source, result);
}

From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java

License:Apache License

/**
 * Returns a fully initialized TokenizerFactory with the specified name and key-value arguments.
 * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should
 * be on the test classpath./*from  w  ww. j  a v a 2 s  .c  om*/
 */
protected TokenizerFactory tokenizerFactory(String name, String... keysAndValues) throws Exception {
    return tokenizerFactory(name, Version.LATEST, keysAndValues);
}

From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java

License:Apache License

/**
 * Returns a fully initialized TokenFilterFactory with the specified name and key-value arguments.
 * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should
 * be on the test classpath./* w  ww.  ja va2s. c  o  m*/
 */
protected TokenFilterFactory tokenFilterFactory(String name, String... keysAndValues) throws Exception {
    return tokenFilterFactory(name, Version.LATEST, keysAndValues);
}

From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java

License:Apache License

/**
 * Returns a fully initialized CharFilterFactory with the specified name and key-value arguments.
 * {@link ClasspathResourceLoader} is used for loading resources, so any required ones should
 * be on the test classpath./*from w  w w  .  ja  v a  2  s  .com*/
 */
protected CharFilterFactory charFilterFactory(String name, String... keysAndValues) throws Exception {
    return charFilterFactory(name, Version.LATEST, new ClasspathResourceLoader(getClass()), keysAndValues);
}

From source file:com.justinleegrant.myluceneplayground.SimpleFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*from  w  w  w  .j a va  2s. c  o m*/
    taxoWriter.close();
}

From source file:com.lithium.flow.filer.lucene.LuceneFiler.java

License:Apache License

public LuceneFiler(@Nonnull Filer delegate, @Nonnull Config config) throws IOException {
    super(delegate);

    String path = config.getString("index.path");
    maxAge = config.getTime("index.maxAge", "-1");
    double maxMergeMb = config.getDouble("index.maxMergeMb", 4);
    double maxCachedMb = config.getDouble("index.maxCacheMb", 64);
    long targetMaxStale = config.getTime("index.targetMaxStale", "5s");
    long targetMinStale = config.getTime("index.targetMinStale", "1s");

    Version version = Version.LATEST;
    Directory dir = FSDirectory.open(new File(path));
    NRTCachingDirectory cachingDir = new NRTCachingDirectory(dir, maxMergeMb, maxCachedMb);
    IndexWriterConfig writerConfig = new IndexWriterConfig(version, null);
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);

    writer = new TrackingIndexWriter(new IndexWriter(cachingDir, writerConfig));
    manager = new SearcherManager(writer.getIndexWriter(), true, new SearcherFactory());
    thread = new ControlledRealTimeReopenThread<>(writer, manager, targetMaxStale, targetMinStale);
    thread.start();//from  w w  w  . j a va  2 s.  c  o  m
}