Example usage for org.apache.lucene.util Version LUCENE_CURRENT

List of usage examples for org.apache.lucene.util Version LUCENE_CURRENT

Introduction

In this page you can find the example usage for org.apache.lucene.util Version LUCENE_CURRENT.

Prototype

Version LUCENE_CURRENT

To view the source code for org.apache.lucene.util Version LUCENE_CURRENT.

Click Source Link

Document

Constant for backwards compatibility.

Usage

From source file:SimpleCompileTest.java

License:Apache License

@Test
public void testStartNode() {
    Node build = SimpleNodeFactory.node("foobar");
    Version.LUCENE_CURRENT.onOrAfter(Version.LUCENE_CURRENT);
    assert true;//from ww w .ja  v  a 2  s .c  o  m
}

From source file:aplicacion.sistema.indexer.test.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";

    /*Agustin lo comento 
     * if (args.length == 0) {//  ww w  .  jav a  2  s. com
      System.err.println("Usage: " + usage);
      System.exit(1);
    }*/

    if (INDEX_DIR.exists()) {
        System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
        System.exit(1);
    }

    final File docDir = new File("E:/indexer");
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR),
                new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
        System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
        indexDocs(writer, docDir);
        System.out.println("Optimizing...");
        writer.optimize();
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:aplicacion.sistema.indexer.test.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
    usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from  w w  w.j a  va 2s.  c  o m
    }

    String index = "e:/index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String normsField = null;
    boolean paging = true;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-norms".equals(args[i])) {
            normsField = args[i + 1];
            i++;
        } else if ("-paging".equals(args[i])) {
            if (args[i + 1].equals("false")) {
                paging = false;
            } else {
                hitsPerPage = Integer.parseInt(args[i + 1]);
                if (hitsPerPage == 0) {
                    paging = false;
                }
            }
            i++;
        }
    }

    IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only searching, so read-only=true

    if (normsField != null)
        reader = new OneNormsReader(reader, normsField);

    Searcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new FileReader(queries));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
    while (true) {
        if (queries == null) // prompt the user
            System.out.println("Enter query: ");

        String line = in.readLine();

        if (line == null || line.length() == -1)
            break;

        line = line.trim();
        if (line.length() == 0)
            break;

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        if (paging) {
            doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null);
        } else {
            doStreamingSearch(searcher, query);
        }
    }
    reader.close();
}

From source file:back.Indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = ".\\indexed";
    String docsPath = ".//artigos";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];//from w w w  . j ava2s  .  co m
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:back.Searcher.java

License:Apache License

/** Simple command-line based search demo. */
public static void search(String query, boolean stopword, boolean stemming, int consulta) throws Exception {

    String index = null;/*from www . j  a v  a 2s. c  o  m*/
    Analyzer analyzer = null;
    if (!stopword && !stemming) {
        index = ".\\indexed";
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        System.out.println("Nenhum Marcado");
    } else if (stopword && !stemming) {
        index = ".\\indexedNoStpWrd";
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        System.out.println("Primeiro Marcado");

    } else if (!stopword && stemming) {
        index = ".\\indexedStemming";
        analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        System.out.println("Segundo Marcado");

    } else if (stopword && stemming) {
        index = ".\\indexedTreated";
        analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT);
        System.out.println("Dois Marcados");

    }
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = false;
    String queryString = query;
    int hitsPerPage = 200;

    CSVReader CSVreader = new CSVReader(new FileReader(".\\matriz.csv"));
    List<String[]> myEntries = CSVreader.readAll();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query1 = parser.parse(line);
        System.out.println("Searching for: " + query1.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query1, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query1, hitsPerPage, raw, queries == null && queryString == null,
                myEntries, consulta);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:be.iRail.BeLaws.Indexer.java

License:Apache License

/** Index all text files under a directory. */
private void index() {
    INDEX_DIR = new File(indexpath);
    final File docDir = new File(lawspath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");

    } else {//w w  w .  j  a  va2  s.com

        Date start = new Date();
        try {
            IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR),
                    new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
            System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
            indexDocs(writer, docDir);
            System.out.println("Optimizing...");
            writer.optimize();
            writer.close();

            Date end = new Date();
            System.out.println(end.getTime() - start.getTime() + " total milliseconds");

        } catch (IOException e) {
            System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        }
    }
}

From source file:cn.edu.thss.iise.beehivez.server.util.StringSimilarityUtil.java

License:Open Source License

/**
 * tokenize the given string, all the words are extracted, lowercased, all
 * the stop words are removed, and all the words are replaced with their
 * stem//w ww .j  a v a2 s.  co  m
 * 
 * @param label
 * @return
 */
public static HashSet<String> snowballTokenize(String label) {
    HashSet<String> ret = new HashSet<String>();
    try {
        Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
                StandardAnalyzer.STOP_WORDS_SET);

        TokenStream stream = analyzer.tokenStream(LabelDocument.FIELD_LABEL, new StringReader(label));
        TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            ret.add(termAtt.term());
        }
        stream.end();
        stream.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

    return ret;
}

From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java

License:Apache License

@Override
public List<Tag> getTermVector(String title, String text) {
    RAMDirectory directory = null;/*from w w w. ja v a 2s .co m*/
    IndexReader reader = null;
    Map<String, Tag> tagsMap = new HashMap<String, Tag>();

    try {
        directory = new RAMDirectory();

        IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true,
                MaxFieldLength.UNLIMITED);
        Document doc = new Document();

        doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
        doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.YES));
        writer.addDocument(doc);

        writer.close();
        reader = IndexReader.open(directory, true);
        int numDocs = reader.maxDoc();
        for (int i = 0; i < numDocs; i++) {
            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title");
            pullTags(termFreqVector, tagsMap);
            termFreqVector = reader.getTermFreqVector(i, "body");
            pullTags(termFreqVector, tagsMap);
        }

    } catch (Exception e) {
        logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e);
    } finally {
        closeIndexReader(reader);
        closeRAMDirectory(directory);
    }
    ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values());
    Collections.sort(tagsList, new Comparator<Tag>() {
        @Override
        public int compare(Tag o1, Tag o2) {
            return o2.getFreqency() - o1.getFreqency();
        }
    });

    return tagsList;
}

From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java

License:Apache License

/**
 * Creates the english analyzer./*from w ww. ja v  a  2 s.  c om*/
 *
 * @return the analyzer
 */
private static Analyzer createEnglishAnalyzer() {
    return new StandardAnalyzer(Version.LUCENE_CURRENT);
    // return new StandardAnalyzer(Version.LUCENE_CURRENT) {
    // public TokenStream tokenStream(String fieldName, Reader reader) {
    // TokenStream result = super.tokenStream(fieldName, reader);
    // result = new SnowballFilter(result, "English");
    // return result;
    // }
    // };
}

From source file:com.bigdata.search.DefaultAnalyzerFactory.java

License:Open Source License

/**
 * Initializes the various kinds of analyzers that we know about.
 * <p>//from   w  ww.j a  va  2  s .c om
 * Note: Each {@link Analyzer} is registered under both the 3 letter and the
 * 2 letter language codes. See <a
 * href="http://www.loc.gov/standards/iso639-2/php/code_list.php">ISO 639-2</a>.
 * 
 * @todo get some informed advice on which {@link Analyzer}s map onto which
 *       language codes.
 * 
 * @todo thread safety? Analyzers produce token processors so maybe there is
 *       no problem here once things are initialized. If so, maybe this
 *       could be static.
 * 
 * @todo configuration. Could be configured by a file containing a class
 *       name and a list of codes that are handled by that class.
 * 
 * @todo strip language code down to 2/3 characters during lookup.
 * 
 * @todo There are a lot of pidgins based on french, english, and other
 *       languages that are not being assigned here.
 */
synchronized private Map<String, AnalyzerConstructor> getAnalyzers() {

    if (analyzers != null) {

        return analyzers;

    }

    analyzers = new HashMap<String, AnalyzerConstructor>();

    final Set<?> emptyStopwords = Collections.EMPTY_SET;

    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new BrazilianAnalyzer(Version.LUCENE_CURRENT)
                        : new BrazilianAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("por", a);
        analyzers.put("pt", a);
    }

    /*
     * Claims to handle Chinese. Does single character extraction. Claims to
     * produce smaller indices as a result.
     * 
     * Note: you can not tokenize with the Chinese analyzer and the do
     * search using the CJK analyzer and visa versa.
     * 
     * Note: I have no idea whether this would work for Japanese and Korean
     * as well. I expect so, but no real clue.
     */
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return new ChineseAnalyzer();
            }
        };
        analyzers.put("zho", a);
        analyzers.put("chi", a);
        analyzers.put("zh", a);
    }

    /*
     * Claims to handle Chinese, Japanese, Korean. Does double character
     * extraction with overlap.
     */
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new CJKAnalyzer(Version.LUCENE_CURRENT)
                        : new CJKAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        //            analyzers.put("zho", a);
        //            analyzers.put("chi", a);
        //            analyzers.put("zh", a);
        analyzers.put("jpn", a);
        analyzers.put("ja", a);
        analyzers.put("jpn", a);
        analyzers.put("kor", a);
        analyzers.put("ko", a);
    }

    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new CzechAnalyzer(Version.LUCENE_CURRENT)
                        : new CzechAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("ces", a);
        analyzers.put("cze", a);
        analyzers.put("cs", a);
    }

    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new DutchAnalyzer(Version.LUCENE_CURRENT)
                        : new DutchAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("dut", a);
        analyzers.put("nld", a);
        analyzers.put("nl", a);
    }

    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new FrenchAnalyzer(Version.LUCENE_CURRENT)
                        : new FrenchAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("fra", a);
        analyzers.put("fre", a);
        analyzers.put("fr", a);
    }

    /*
     * Note: There are a lot of language codes for German variants that
     * might be useful here.
     */
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new GermanAnalyzer(Version.LUCENE_CURRENT)
                        : new GermanAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("deu", a);
        analyzers.put("ger", a);
        analyzers.put("de", a);
    }

    // Note: ancient greek has a different code (grc).
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new GreekAnalyzer(Version.LUCENE_CURRENT)
                        : new GreekAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("gre", a);
        analyzers.put("ell", a);
        analyzers.put("el", a);
    }

    // @todo what about other Cyrillic scripts?
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new RussianAnalyzer(Version.LUCENE_CURRENT)
                        : new RussianAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("rus", a);
        analyzers.put("ru", a);
    }

    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return new ThaiAnalyzer(Version.LUCENE_CURRENT);
            }
        };
        analyzers.put("tha", a);
        analyzers.put("th", a);
    }

    // English
    {
        AnalyzerConstructor a = new AnalyzerConstructor() {
            public Analyzer newInstance(final boolean filterStopwords) {
                return filterStopwords ? new StandardAnalyzer(Version.LUCENE_CURRENT)
                        : new StandardAnalyzer(Version.LUCENE_CURRENT, emptyStopwords);
            }
        };
        analyzers.put("eng", a);
        analyzers.put("en", a);
        /*
         * Note: There MUST be an entry under the empty string (""). This
         * entry will be requested when there is no entry for the specified
         * language code.
         */
        analyzers.put("", a);
    }

    return analyzers;

}