Example usage for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 *///  w  ww. j  av a 2 s.c  om
public void update() throws IOException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    RuntimeEnvironment env = RuntimeEnvironment.getInstance();

    reader = null;
    writer = null;
    settings = null;
    uidIter = null;
    postsIter = null;
    acceptedNonlocalSymlinks.clear();

    IOException finishingException = null;
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(env.getRamBufferSize());
        /**
         * Most data in OpenGrok is indexed but not stored, so use the best
         * compression on the minority of data that is stored, since it
         * should not have a detrimental impact on overall throughput.
         */
        iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION));
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk
        completer = new PendingFileCompleter();

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = env.getSourceRootFile();
            } else {
                sourceRoot = new File(env.getSourceRootFile(), dir);
            }

            if (env.isHistoryEnabled()) {
                try {
                    HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
                } catch (HistoryException ex) {
                    String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot);
                    LOGGER.log(Level.SEVERE, exmsg, ex);
                    continue;
                }
            }

            dir = Util.fixPathIfWindows(dir);

            String startuid = Util.path2uid(dir, "");
            reader = DirectoryReader.open(indexDirectory); // open existing index
            settings = readAnalysisSettings();
            if (settings == null) {
                settings = new IndexAnalysisSettings();
            }
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (terms != null) {
                    uidIter = terms.iterator();
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?",
                                startuid);
                    }
                }

                // The actual indexing happens in indexParallel().

                IndexDownArgs args = new IndexDownArgs();
                Statistics elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
                indexDown(sourceRoot, dir, args);
                showFileCount(dir, args, elapsed);

                args.cur_count = 0;
                elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
                indexParallel(dir, args);
                elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir));

                // Remove data for the trailing terms that indexDown()
                // did not traverse. These correspond to files that have been
                // removed and have higher ordering than any present files.
                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile(true);
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }

                markProjectIndexed(project);
            } finally {
                reader.close();
            }
        }

        try {
            finishWriting();
        } catch (IOException e) {
            finishingException = e;
        }
    } catch (RuntimeException ex) {
        LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
        throw ex;
    } finally {
        completer = null;
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            if (finishingException == null) {
                finishingException = e;
            }
            LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
        } finally {
            writer = null;
            synchronized (lock) {
                running = false;
            }
        }
    }

    if (finishingException != null) {
        throw finishingException;
    }

    if (!isInterrupted() && isDirty()) {
        if (env.isOptimizeDatabase()) {
            optimize();
        }
        env.setIndexTimestamp();
    }
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Get all files in this index database.
 *
 * @throws IOException If an IO error occurs while reading from the database
 * @return set of files in this index database
 *///  w  w  w  .  j ava 2s. c  om
public Set<String> getFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms;
    Set<String> files = new HashSet<>();

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            String value = iter.term().utf8ToString();
            if (value.isEmpty()) {
                iter.next();
                continue;
            }

            files.add(Util.uid2url(value));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }

    return files;
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;//from www  . j a v a  2  s .co  m
    TermsEnum iter = null;
    Terms terms;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                LOGGER.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 * @throws HistoryException if an error occurs when accessing the history
 *//*from  www  .ja v  a2  s . c om*/
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        log.severe("Unable to run ctags! searching definitions will not work!");
    }

    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }

    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk            

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }

            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);

            String startuid = Util.path2uid(dir, "");
            IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator(uidIter);
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid                        
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    log.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    if (log.isLoggable(Level.INFO)) {
                        log.log(Level.INFO, "Need to process: {0} files for {1}",
                                new Object[] { file_cnt, dir });
                    }
                }

                indexDown(sourceRoot, dir, false, 0, file_cnt);

                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }

        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }

        synchronized (lock) {
            running = false;
        }
    }

    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                log.log(Level.WARNING, "Failed to create file ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * List all of the files in this index database
 *
 * @throws IOException If an IO error occurs while reading from the database
 *//*from w w w.j  av a 2  s .c  o m*/
public void listFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
        }
        iter = terms.iterator(iter); // init uid iterator
        while (iter != null && iter.term() != null) {
            log.fine(Util.uid2url(iter.term().utf8ToString()));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;/*from   w  w w . j  av a2 s.c om*/
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
        }
        iter = terms.iterator(iter); // init uid iterator            
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                log.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
            /*} else {
             break;
             }*/
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader Index reader to use./*from   w  ww .  j ava 2 s  .  co  m*/
 * @param values The list of terms to load.
 * @param term   The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
public static void fuzzy(IndexReader reader, List<String> values, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, false);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val))
            values.add(val.utf8ToString());
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from w  w w . j  a  v a  2s. co m
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
@Beta
public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val)) {
            Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
            bucket.add(t, reader.docFreq(t));
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from w ww  .j a  v  a2  s. c  o m
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), null);
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        values.add(val.utf8ToString());
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use./*w w  w.  j  ava 2  s.  c o  m*/
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())),
            term.bytes());
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
        bucket.add(t, reader.docFreq(t));
    }
}