Example usage for org.apache.lucene.index DirectoryReader open

List of usage examples for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit) throws IOException 

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:app.finder.topicsource.service.SearchFiles.java

License:Apache License

public List<TopicSource> getTopicSources(String queryString) throws IOException, ParseException {
    String field = "contents";
    String queries = null;//from  ww  w .jav a 2s .com
    int repeat = 0;
    boolean raw = false;

    int hitsPerPage = SEARCH_MAX_SIZE; // 100;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    QueryParser parser = new QueryParser(field, analyzer);

    Query query = parser.parse(queryString);

    //System.out.println("Searching for: " + query.toString(field));
    searcher.search(query, null, SEARCH_MAX_SIZE);

    List<String> list = doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

    reader.close();

    List<TopicSource> topicSourceList = new ArrayList<TopicSource>();
    TopicSource topicSource = null;
    int counter = 0;
    for (String fileName : list) {
        topicSource = new TopicSource();
        File file = new File(fileName);

        topicSource.setFileName("" + (++counter) + ". " + file.getName());
        topicSource.setPath(file.getCanonicalPath());
        topicSource.setText(readFile(file));
        topicSourceList.add(topicSource);
    }

    return topicSourceList;
}

From source file:app.SearchFiles.java

License:Apache License

/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
    String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
        System.out.println(usage);
        System.exit(0);//from  w w  w .jav a  2 s. c o m
    }

    String index = "index";
    String field = "contents";
    String queries = null;
    int repeat = 0;
    boolean raw = true;
    String queryString = null;
    int hitsPerPage = 10;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            index = args[i + 1];
            i++;
        } else if ("-field".equals(args[i])) {
            field = args[i + 1];
            i++;
        } else if ("-queries".equals(args[i])) {
            queries = args[i + 1];
            i++;
        } else if ("-query".equals(args[i])) {
            queryString = args[i + 1];
            i++;
        } else if ("-repeat".equals(args[i])) {
            repeat = Integer.parseInt(args[i + 1]);
            i++;
        } else if ("-raw".equals(args[i])) {
            raw = true;
        } else if ("-paging".equals(args[i])) {
            hitsPerPage = Integer.parseInt(args[i + 1]);
            if (hitsPerPage <= 0) {
                System.err.println("There must be at least 1 hit per page.");
                System.exit(1);
            }
            i++;
        }
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
    while (true) {
        if (queries == null && queryString == null) { // prompt the user
            System.out.println("Enter query: ");
        }

        String line = queryString != null ? queryString : in.readLine();

        if (line == null || line.length() == -1) {
            break;
        }

        line = line.trim();
        if (line.length() == 0) {
            break;
        }

        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));

        if (repeat > 0) { // repeat & time as benchmark
            Date start = new Date();
            for (int i = 0; i < repeat; i++) {
                searcher.search(query, null, 100);
            }
            Date end = new Date();
            System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        }

        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);

        if (queryString != null) {
            break;
        }
    }
    reader.close();
}

From source file:Application.mediaIndexer.java

public static void SearchFiles(String index, String queryString, String selected, TextArea results)
        throws IOException, ParseException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    QueryParser parser = new QueryParser(selected, analyzer);
    String line = queryString != null ? queryString : in.readLine();
    line = line.trim();/*from  ww w  .  ja  v a  2 s. c o  m*/
    Query query = parser.parse(line);
    int maxHits = 100;
    TopDocs docsResults = searcher.search(query, maxHits);
    ScoreDoc[] hits = docsResults.scoreDocs;
    for (int i = 0; i < hits.length; i++) {
        Document doc = searcher.doc(hits[i].doc);
        results.appendText("Title: " + doc.get("title") + "\n");
        results.appendText("Artists: " + doc.get("xmpDM:artist") + "\n");
        results.appendText("Genre: " + doc.get("xmpDM:genre") + "\n");
        results.appendText("Year: " + doc.get("xmpDM:releaseDate") + "\n");
    }
    // Playlist.
    playlist.clear();
    for (int i = 0; i < hits.length; i++) {
        Document doc = searcher.doc(hits[i].doc);
        String path = doc.get("path");
        if (path != null)
            playlist.add(new File(path));
    }
    reader.close();
}

From source file:approxnn.ANNRetriever.java

public ANNRetriever(String propFile) throws Exception {
    prop = new Properties();
    prop.load(new FileReader(propFile));
    numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions"));

    syntheticQueries = prop.getProperty("data.source").equals("synthetic");

    if (syntheticQueries)
        rqgen = new RandomQueryGen(prop);
    // Read from optimized index (instead of the initial index)
    String indexPath = !syntheticQueries ? prop.getProperty("index") : rqgen.randomSamplesFileName() + ".index";

    if (indexPath != null) {
        File indexDir = new File(indexPath);

        //reader = DirectoryReader.open(FSDirectory.open(indexDir.toPath()));
        reader = DirectoryReader.open(MMapDirectory.open(indexDir.toPath()));
        //reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(indexDir.toPath()), IOContext.DEFAULT));

        searcher = new IndexSearcher(reader);
        searcher.setSimilarity(new LMJelinekMercerSimilarity(0.1f)); // almost close to tf        
    }//ww  w .ja v  a2s . c o  m
    DocVector.initVectorRange(prop);
    numIntervals = DocVector.numIntervals;

    if (!syntheticQueries)
        indexedVecQueries = new IndexedVecQueries(propFile);
    //System.out.println(indexedVecQueries);

    //vecQueries = new VecQueries(propFile);
    debug = Boolean.parseBoolean(prop.getProperty("debug", "false"));
    subSpaceDimension = Integer.parseInt(prop.getProperty("subspace.dimension", "0"));

    start = Integer.parseInt(prop.getProperty("retrieve.start", "0"));
    end = Integer.parseInt(prop.getProperty("retrieve.end", "-1"));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * This constructor loads the SKOS model from a given InputStream using the
 * given serialization language parameter, which must be either N3, RDF/XML,
 * or TURTLE.//from  w  w  w . j  a v  a  2 s.  co m
 *
 * @param inputStream the input stream
 * @param lang the serialization language
 * @throws IOException if the model cannot be loaded
 */
public SKOSEngineImpl(InputStream inputStream, String lang) throws IOException {
    if (!("N3".equals(lang) || "RDF/XML".equals(lang) || "TURTLE".equals(lang))) {
        throw new IOException("Invalid RDF serialization format");
    }
    this.analyzer = new SimpleAnalyzer();
    this.skosModel = ModelFactory.createDefaultModel();
    skosModel.read(inputStream, null, lang);
    indexDir = new RAMDirectory();
    entailSKOSModel();
    indexSKOSModel();
    searcher = new IndexSearcher(DirectoryReader.open(indexDir));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * This constructor loads the SKOS model from a given filename or URI,
 * starts the indexing process and sets up the index searcher.
 *
 * @param indexPath index path//from  w w w. j  av a  2  s  .c o  m
 * @param filenameOrURI file name or URI
 * @param languages the languages to be considered
 * @throws IOException if indexing SKOS model fails
 */
public SKOSEngineImpl(String indexPath, String filenameOrURI, List<String> languages) throws IOException {
    this.analyzer = new SimpleAnalyzer();
    String langSig = "";
    if (languages != null) {
        this.languages = new TreeSet<>(languages);
        if (!this.languages.isEmpty()) {
            langSig = "-" + join(this.languages.iterator(), '-');
        }
    }
    String name = getName(filenameOrURI);
    File dir = new File(indexPath + name + langSig);
    this.indexDir = FSDirectory.open(dir.toPath());
    if (filenameOrURI != null) {
        FileManager fileManager = new FileManager();
        fileManager.addLocatorFile();
        fileManager.addLocatorURL();
        fileManager.addLocatorClassLoader(SKOSEngineImpl.class.getClassLoader());
        if (getExtension(filenameOrURI).equals("zip")) {
            fileManager.addLocatorZip(filenameOrURI);
            filenameOrURI = getBaseName(filenameOrURI);
        }
        File inputFile = new File(filenameOrURI);
        Path inputPath = Paths.get(inputFile.getParent(), inputFile.getName());
        skosModel = fileManager.loadModel(inputPath.toUri().toString());
        entailSKOSModel();
        indexSKOSModel();
        searcher = new IndexSearcher(DirectoryReader.open(indexDir));
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * This constructor loads the SKOS model from a given InputStream using the
 * given serialization language parameter, which must be either N3, RDF/XML,
 * or TURTLE.//www  . j av a  2s  . c o  m
 *
 * @param inputStream the input stream
 * @param format the serialization language
 * @param languages the languages
 * @throws IOException if the model cannot be loaded
 */
public SKOSEngineImpl(InputStream inputStream, String format, List<String> languages) throws IOException {
    if (!("N3".equals(format) || "RDF/XML".equals(format) || "TURTLE".equals(format))) {
        throw new IOException("Invalid RDF serialization format");
    }
    if (languages != null) {
        this.languages = new TreeSet<>(languages);
    }
    analyzer = new SimpleAnalyzer();
    skosModel = ModelFactory.createDefaultModel();
    skosModel.read(inputStream, null, format);
    indexDir = new RAMDirectory();
    entailSKOSModel();
    indexSKOSModel();
    searcher = new IndexSearcher(DirectoryReader.open(indexDir));
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates the temporary index that provides a lookup of checklist bank id to
 * GUID/*from w  w w .j a  v  a  2 s .  co m*/
 */
private IndexSearcher createTmpGuidIndex(String cbExportFile) throws Exception {
    System.out.println("Starting to create the tmp guid index...");
    IndexWriter iw = createIndexWriter(new File("/data/tmp/guid"), new KeywordAnalyzer(), true);
    au.com.bytecode.opencsv.CSVReader cbreader = new au.com.bytecode.opencsv.CSVReader(
            new FileReader(cbExportFile), '\t', '"', '/', 1);
    for (String[] values = cbreader.readNext(); values != null; values = cbreader.readNext()) {
        Document doc = new Document();
        String id = values[POS_ID];
        String guid = values[POS_LSID];
        doc.add(new StringField("id", id, Store.YES));
        if (StringUtils.isEmpty(id))
            guid = id;

        doc.add(new StoredField("guid", guid));
        iw.addDocument(doc);
    }
    System.out.println("Finished writing the tmp guid index...");
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    //As of lucene 4.0 all IndexReaders are read only
    return new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("/data/tmp/guid"))));
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Indexes common names from CoL and ANBG for use in the Common name search.
 *
 * @param iw  The index writer to write the common documents to
 * @param exportDir  The directory that contains the common name export files.
 * @param indexDir The directory in which to create the index.
 * @throws Exception//from  w ww.j ava 2s . c om
 */
private void indexCommonNames(IndexWriter iw, String exportDir, String indexDir) throws Exception {
    log.info("Creating Common Names Index ...");

    //TODO think about adding additional sources for common names

    IndexSearcher currentNameSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "cb"))));
    IndexSearcher extraSearcher = new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "id"))));

    addCoLCommonNames(iw, currentNameSearcher);
    addAnbgCommonNames(afdFile, iw, currentNameSearcher, extraSearcher, '\t');
    addAnbgCommonNames(apniFile, iw, currentNameSearcher, extraSearcher, ',');

    iw.commit();
    iw.forceMerge(1);
    iw.close();
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates a temporary index that will provide a lookup up of lsid to "real lsid".
 * <p/>/*  www .j a v  a2 s . c  o  m*/
 * This deals with the following situations:
 * - common names that are sourced from CoL (LSIDs will be mapped to corresponding ANBG LSID)
 * - Multiple ANBG LSIDs exist for the same scientific name and more than 1 are mapped to the same common name.
 *
 * @param idFile
 * @throws Exception
 */
private void createExtraIdIndex(String idxLocation, File idFile) throws Exception {
    CSVReader reader = new CSVReader(new FileReader(idFile), '\t', '"', '~');//CSVReader.build(idFile, "UTF-8", "\t", '"', 0);
    File indexDir = new File(idxLocation);
    IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);//new IndexWriter(FSDirectory.open(indexDir), new KeywordAnalyzer(), true, MaxFieldLength.UNLIMITED);
    String[] values = null;
    while ((values = reader.readNext()) != null) {

        if (values != null && values.length >= 3) {
            Document doc = new Document();
            //doc.add(new Field("lsid", values[2], Store.NO, Index.NOT_ANALYZED));
            doc.add(new StringField("lsid", values[2], Store.NO));
            //doc.add(new Field("reallsid", values[1], Store.YES, Index.NO));
            doc.add(new StoredField("reallsid", values[1]));
            iw.addDocument(doc);
        }
    }
    iw.commit();
    iw.forceMerge(1);
    iw.close();
    idSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir)));
}