Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:aos.lucene.search.advanced.CategorizerTest.java

License:Apache License

private void buildCategoryVectors() throws IOException {
    IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory());

    int maxDoc = reader.maxDoc();

    for (int i = 0; i < maxDoc; i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i);
            String category = doc.get("category");

            Map vectorMap = (Map) categoryMap.get(category);
            if (vectorMap == null) {
                vectorMap = new TreeMap();
                categoryMap.put(category, vectorMap);
            }/*from  w  w  w  .j  a v a  2 s .  c om*/

            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject");

            addTermFreqToMap(vectorMap, termFreqVector);
        }
    }
}

From source file:aos.lucene.search.ext.filters.SpecialsFilter.java

License:Apache License

public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet bits = new OpenBitSet(reader.maxDoc());

    String[] isbns = accessor.isbns(); //

    int[] docs = new int[1];
    int[] freqs = new int[1];

    for (String isbn : isbns) {
        if (isbn != null) {
            TermDocs termDocs = reader.termDocs(new Term("isbn", isbn)); //
            int count = termDocs.read(docs, freqs);
            if (count == 1) {
                bits.set(docs[0]);//from  www. j a va  2  s  . c  o m
            }
        }
    }

    return bits;
}

From source file:aos.lucene.tools.BooksMoreLikeThis.java

License:Apache License

public static void main(String[] args) throws Throwable {

    String indexDir = System.getProperty("index.dir");
    FSDirectory directory = FSDirectory.open(new File(indexDir));
    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    int numDocs = reader.maxDoc();

    MoreLikeThis mlt = new MoreLikeThis(reader);
    mlt.setFieldNames(new String[] { "title", "author" });
    mlt.setMinTermFreq(1);// w w  w  .  j a  v  a 2s .c  o  m
    mlt.setMinDocFreq(1);

    for (int docID = 0; docID < numDocs; docID++) {
        LOGGER.info();
        Document doc = reader.document(docID);
        LOGGER.info(doc.get("title"));

        Query query = mlt.like(docID);
        LOGGER.info("  query=" + query);

        TopDocs similarDocs = searcher.search(query, 10);
        if (similarDocs.totalHits == 0)
            LOGGER.info("  None like this");
        for (int i = 0; i < similarDocs.scoreDocs.length; i++) {
            if (similarDocs.scoreDocs[i].doc != docID) {
                doc = reader.document(similarDocs.scoreDocs[i].doc);
                LOGGER.info("  -> " + doc.getField("title").stringValue());
            }
        }
    }

    reader.close();
    directory.close();
}

From source file:br.bireme.ngrams.NGrams.java

public static void export(NGIndex index, final NGSchema schema, final String outFile,
        final String outFileEncoding) throws IOException {
    if (index == null) {
        throw new NullPointerException("index");
    }//w  ww.j a  v a 2  s.  co  m
    if (schema == null) {
        throw new NullPointerException("schema");
    }
    if (outFile == null) {
        throw new NullPointerException("outFile");
    }
    if (outFileEncoding == null) {
        throw new NullPointerException("outFileEncoding");
    }
    final Parameters parameters = schema.getParameters();
    final TreeMap<Integer, String> fields = new TreeMap<>();
    final IndexReader reader = index.getIndexSearcher().getIndexReader();
    final int maxdoc = reader.maxDoc();
    //final Bits liveDocs = MultiFields.getLiveDocs(reader);
    final Bits liveDocs = MultiBits.getLiveDocs(reader);
    final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding),
            StandardOpenOption.CREATE, StandardOpenOption.WRITE);

    boolean first = true;

    for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) {
        fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD);
    }

    for (int docID = 0; docID < maxdoc; docID++) {
        if ((liveDocs != null) && (!liveDocs.get(docID)))
            continue;
        final Document doc = reader.document(docID);

        if (first) {
            first = false;
        } else {
            writer.newLine();
        }
        writer.append(doc2pipe(doc, fields));
    }
    writer.close();
    reader.close();
}

From source file:br.bireme.ngrams.TestIndex.java

public static boolean test(final IndexReader ireader, final NGSchema schema)
        throws IOException, ParserConfigurationException, SAXException {
    final Parameters parameters = schema.getParameters();
    final Map<String, Field> fields = parameters.getNameFields();
    boolean bad = false;

    for (int id = 0; id < ireader.maxDoc(); id++) {
        final Document doc = ireader.document(id);

        if (id % 100000 == 0)
            System.out.println("+++ " + id);
        bad = badDocument(doc, fields);//from   ww  w . java  2s. com
        if (bad) {
            System.out.println("BAD DOCUMENT => id: " + doc.get("id"));
            break;
        }
    }
    ireader.close();

    return !bad;
}

From source file:ca.gnewton.lusql.core.ViewIndex.java

License:Apache License

/**
 * Describe <code>main</code> method here.
 *
 * @param args a <code>String</code> value
 */// w  ww.j a v  a  2 s  . co m
public static final void main(final String[] args) {
    try {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(args[0])));
        System.out.println("# of documents indexed: " + (reader.maxDoc() - 1));

        int maxDoc = reader.maxDoc();
        for (int i = 0; i < maxDoc; i++) {
            Document doc = reader.document(i);
            printDoc(doc, i);

        }

        reader.close();
    } catch (Throwable t) {
        t.printStackTrace();
    }

}

From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(//from w ww.j av  a 2 s. co m
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        out.println(doc.getField(StatusField.ID.name).stringValue() + "\t"
                + doc.getField(StatusField.SCREEN_NAME.name).stringValue());
    }
    out.close();
    reader.close();
}

From source file:ccc.plugins.search.lucene.AclFilter.java

License:Open Source License

/** {@inheritDoc} */
@Override/*from www  . j  av  a  2  s.  co  m*/
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {

    // Assume all documents are invalid.
    final OpenBitSet docs = new OpenBitSet(reader.maxDoc());

    // Validate accessible documents.
    for (int i = 0; i < reader.maxDoc(); i++) {
        final Document d = reader.document(i);
        final Field aclField = d.getField(_field);
        if (null != aclField && _ac.canRead(deserialise(aclField.getBinaryValue()))) {
            docs.set(i);
        }
    }

    return docs;
}

From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java

License:Apache License

@Override
public List<Tag> getTermVector(String title, String text) {
    RAMDirectory directory = null;/*from   w  w w.  j  a  va  2 s . co m*/
    IndexReader reader = null;
    Map<String, Tag> tagsMap = new HashMap<String, Tag>();

    try {
        directory = new RAMDirectory();

        IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true,
                MaxFieldLength.UNLIMITED);
        Document doc = new Document();

        doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
        doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED,
                Field.TermVector.YES));
        writer.addDocument(doc);

        writer.close();
        reader = IndexReader.open(directory, true);
        int numDocs = reader.maxDoc();
        for (int i = 0; i < numDocs; i++) {
            TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title");
            pullTags(termFreqVector, tagsMap);
            termFreqVector = reader.getTermFreqVector(i, "body");
            pullTags(termFreqVector, tagsMap);
        }

    } catch (Exception e) {
        logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e);
    } finally {
        closeIndexReader(reader);
        closeRAMDirectory(directory);
    }
    ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values());
    Collections.sort(tagsList, new Comparator<Tag>() {
        @Override
        public int compare(Tag o1, Tag o2) {
            return o2.getFreqency() - o1.getFreqency();
        }
    });

    return tagsList;
}

From source file:com.bloatit.data.search.DaoFeatureSearchFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    bitSet.set(0, reader.maxDoc()); // Set all document ok

    if (filteredTerms != null) {
        for (final Pair<String, String> pair : filteredTerms) {
            final TermDocs termDocs = reader.termDocs(new Term(pair.key, pair.value.toLowerCase()));
            while (termDocs.next()) {
                bitSet.clear(termDocs.doc());
            }/*from  w w w  .  j  av a  2s .  c  o  m*/
        }
    }
    return bitSet;
}