Example usage for org.apache.lucene.index Fields terms

List of usage examples for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 *///  w  ww. j  av a 2 s.c  om
public void update() throws IOException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    RuntimeEnvironment env = RuntimeEnvironment.getInstance();

    reader = null;
    writer = null;
    settings = null;
    uidIter = null;
    postsIter = null;
    acceptedNonlocalSymlinks.clear();

    IOException finishingException = null;
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(env.getRamBufferSize());
        /**
         * Most data in OpenGrok is indexed but not stored, so use the best
         * compression on the minority of data that is stored, since it
         * should not have a detrimental impact on overall throughput.
         */
        iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION));
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk
        completer = new PendingFileCompleter();

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = env.getSourceRootFile();
            } else {
                sourceRoot = new File(env.getSourceRootFile(), dir);
            }

            if (env.isHistoryEnabled()) {
                try {
                    HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
                } catch (HistoryException ex) {
                    String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot);
                    LOGGER.log(Level.SEVERE, exmsg, ex);
                    continue;
                }
            }

            dir = Util.fixPathIfWindows(dir);

            String startuid = Util.path2uid(dir, "");
            reader = DirectoryReader.open(indexDirectory); // open existing index
            settings = readAnalysisSettings();
            if (settings == null) {
                settings = new IndexAnalysisSettings();
            }
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (terms != null) {
                    uidIter = terms.iterator();
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?",
                                startuid);
                    }
                }

                // The actual indexing happens in indexParallel().

                IndexDownArgs args = new IndexDownArgs();
                Statistics elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
                indexDown(sourceRoot, dir, args);
                showFileCount(dir, args, elapsed);

                args.cur_count = 0;
                elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
                indexParallel(dir, args);
                elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir));

                // Remove data for the trailing terms that indexDown()
                // did not traverse. These correspond to files that have been
                // removed and have higher ordering than any present files.
                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile(true);
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }

                markProjectIndexed(project);
            } finally {
                reader.close();
            }
        }

        try {
            finishWriting();
        } catch (IOException e) {
            finishingException = e;
        }
    } catch (RuntimeException ex) {
        LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
        throw ex;
    } finally {
        completer = null;
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            if (finishingException == null) {
                finishingException = e;
            }
            LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
        } finally {
            writer = null;
            synchronized (lock) {
                running = false;
            }
        }
    }

    if (finishingException != null) {
        throw finishingException;
    }

    if (!isInterrupted() && isDirty()) {
        if (env.isOptimizeDatabase()) {
            optimize();
        }
        env.setIndexTimestamp();
    }
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Get all files in this index database.
 *
 * @throws IOException If an IO error occurs while reading from the database
 * @return set of files in this index database
 *///  w  w  w  .  j ava 2s. c  om
public Set<String> getFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms;
    Set<String> files = new HashSet<>();

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            String value = iter.term().utf8ToString();
            if (value.isEmpty()) {
                iter.next();
                continue;
            }

            files.add(Util.uid2url(value));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }

    return files;
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;//from www  . j a v a  2  s .co  m
    TermsEnum iter = null;
    Terms terms;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                LOGGER.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 * @throws HistoryException if an error occurs when accessing the history
 *//*from  www  .ja v  a2  s . c om*/
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        log.severe("Unable to run ctags! searching definitions will not work!");
    }

    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }

    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk            

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }

            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);

            String startuid = Util.path2uid(dir, "");
            IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator(uidIter);
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid                        
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    log.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    if (log.isLoggable(Level.INFO)) {
                        log.log(Level.INFO, "Need to process: {0} files for {1}",
                                new Object[] { file_cnt, dir });
                    }
                }

                indexDown(sourceRoot, dir, false, 0, file_cnt);

                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }

        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }

        synchronized (lock) {
            running = false;
        }
    }

    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                log.log(Level.WARNING, "Failed to create file ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * List all of the files in this index database
 *
 * @throws IOException If an IO error occurs while reading from the database
 *//*from w w w.j  av a 2  s .c  o m*/
public void listFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
        }
        iter = terms.iterator(iter); // init uid iterator
        while (iter != null && iter.term() != null) {
            log.fine(Util.uid2url(iter.term().utf8ToString()));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;/*from   w  w w . j  av a2 s.c om*/
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
        }
        iter = terms.iterator(iter); // init uid iterator            
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                log.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
            /*} else {
             break;
             }*/
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader Index reader to use./*from   w  ww .  j ava 2 s  .  co  m*/
 * @param values The list of terms to load.
 * @param term   The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
public static void fuzzy(IndexReader reader, List<String> values, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, false);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val))
            values.add(val.utf8ToString());
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from w  w w . j  a  v a  2s. co m
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
@Beta
public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val)) {
            Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
            bucket.add(t, reader.docFreq(t));
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from w ww  .j a  v  a2  s. c  o m
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), null);
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        values.add(val.utf8ToString());
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use./*w w  w.  j  ava 2  s.  c o  m*/
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())),
            term.bytes());
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
        bucket.add(t, reader.docFreq(t));
    }
}