Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype



public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:BlockBuilding.AbstractBlockBuilding.java

License:Apache License

protected int[] getDocumentIds(IndexReader reader) {
    int[] documentIds = new int[reader.numDocs()];
    for (int i = 0; i < documentIds.length; i++) {
        try {/*  www  .java2  s .co m*/
            Document document = reader.document(i);
            documentIds[i] = Integer.parseInt(document.get(DOC_ID));
        } catch (IOException ex) {
            LOGGER.log(Level.SEVERE, null, ex);
        }
    }
    return documentIds;
}

From source file:BlockBuilding.Utilities.java

License:Open Source License

public static int[] getDocumentIds(IndexReader reader) {
    int[] documentIds = new int[reader.numDocs()];
    for (int i = 0; i < documentIds.length; i++) {
        try {//from   w  w  w.  j a v a2s  .  c o m
            Document document = reader.document(i);
            documentIds[i] = Integer.parseInt(document.get(DOC_ID));
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
    return documentIds;
}

From source file:br.bireme.ngrams.NGrams.java

public static void export(NGIndex index, final NGSchema schema, final String outFile,
        final String outFileEncoding) throws IOException {
    if (index == null) {
        throw new NullPointerException("index");
    }/* w ww .  j  ava2  s . c om*/
    if (schema == null) {
        throw new NullPointerException("schema");
    }
    if (outFile == null) {
        throw new NullPointerException("outFile");
    }
    if (outFileEncoding == null) {
        throw new NullPointerException("outFileEncoding");
    }
    final Parameters parameters = schema.getParameters();
    final TreeMap<Integer, String> fields = new TreeMap<>();
    final IndexReader reader = index.getIndexSearcher().getIndexReader();
    final int maxdoc = reader.maxDoc();
    //final Bits liveDocs = MultiFields.getLiveDocs(reader);
    final Bits liveDocs = MultiBits.getLiveDocs(reader);
    final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding),
            StandardOpenOption.CREATE, StandardOpenOption.WRITE);

    boolean first = true;

    for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) {
        fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD);
    }

    for (int docID = 0; docID < maxdoc; docID++) {
        if ((liveDocs != null) && (!liveDocs.get(docID)))
            continue;
        final Document doc = reader.document(docID);

        if (first) {
            first = false;
        } else {
            writer.newLine();
        }
        writer.append(doc2pipe(doc, fields));
    }
    writer.close();
    reader.close();
}

From source file:br.bireme.ngrams.TestIndex.java

public static boolean test(final IndexReader ireader, final NGSchema schema)
        throws IOException, ParserConfigurationException, SAXException {
    final Parameters parameters = schema.getParameters();
    final Map<String, Field> fields = parameters.getNameFields();
    boolean bad = false;

    for (int id = 0; id < ireader.maxDoc(); id++) {
        final Document doc = ireader.document(id);

        if (id % 100000 == 0)
            System.out.println("+++ " + id);
        bad = badDocument(doc, fields);//from   w  w  w.  j  av a 2 s .co  m
        if (bad) {
            System.out.println("BAD DOCUMENT => id: " + doc.get("id"));
            break;
        }
    }
    ireader.close();

    return !bad;
}

From source file:ca.gnewton.lusql.core.ViewIndex.java

License:Apache License

/**
 * Describe <code>main</code> method here.
 *
 * @param args a <code>String</code> value
 *///from w ww.j  a  v  a2  s .  co m
public static final void main(final String[] args) {
    try {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(args[0])));
        System.out.println("# of documents indexed: " + (reader.maxDoc() - 1));

        int maxDoc = reader.maxDoc();
        for (int i = 0; i < maxDoc; i++) {
            Document doc = reader.document(i);
            printDoc(doc, i);

        }

        reader.close();
    } catch (Throwable t) {
        t.printStackTrace();
    }

}

From source file:ca.ualberta.entitylinking.common.indexing.DocumentIndexer.java

License:Open Source License

public void readLuceneIndex(String indexDir, String docName) {
    IndexReader reader = null;
    Map<String, Integer> name2id = null;

    //load index//w  w w  . j av  a  2  s .c  o  m
    try {
        reader = IndexReader.open(FSDirectory.open(new File(indexDir)));

        String[] stringArray = FieldCache.DEFAULT.getStrings(reader, "name");

        // build a map from string to its document id.
        name2id = new HashMap<String, Integer>();
        for (int i = 0; i < stringArray.length; i++)
            name2id.put(stringArray[i], i);
    } catch (IOException e) {
        e.printStackTrace();
    }

    //get tf-idf vector of a document.
    DefaultSimilarity simObj = new DefaultSimilarity();

    try {
        if (!name2id.containsKey(docName))
            return;

        int docId = name2id.get(docName);
        Document doc = reader.document(docId);

        TermFreqVector termVector = reader.getTermFreqVector(docId, "contents");
        int numDocs = reader.numDocs();

        int[] termFreq = termVector.getTermFrequencies();
        String[] terms = termVector.getTerms();
        for (int i = 0; i < terms.length; i++) {
            //avoid stop words
            //            if (isStopWord(terms[i]))
            //               continue;

            int tf = termFreq[i];
            int df = reader.docFreq(new Term("contents", terms[i]));
            float tfidf = simObj.tf(tf) * simObj.idf(df, numDocs);
            System.out.println(terms[i] + ": " + tfidf);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:cc.osint.graphd.graph.Graph.java

License:Apache License

public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception {
    long start_t = System.currentTimeMillis();
    final List<JSONObject> results = new ArrayList<JSONObject>();
    QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer);
    qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
    qp.setAllowLeadingWildcard(true);//from   ww w  . j a  va 2s .c o m
    Query query = qp.parse(queryStr);
    org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter(
            new QueryWrapperFilter(query));

    indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() {
        private int docBase;
        IndexReader reader;

        // ignore scoring
        public void setScorer(Scorer scorer) {
        }

        // accept docs out of order
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            try {
                Document d = reader.document(doc);
                JSONObject result = new JSONObject();
                for (Fieldable f : d.getFields()) {
                    result.put(f.name(), d.get(f.name()));
                }
                results.add(result);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.reader = reader;
            this.docBase = docBase;
        }
    });
    long end_t = System.currentTimeMillis();
    //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)");
    return results;
}

From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(/*from  w ww . j  av  a 2s.co  m*/
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options);
        System.exit(-1);
    }

    File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION));
    if (!indexLocation.exists()) {
        System.err.println("Error: " + indexLocation + " does not exist!");
        System.exit(-1);
    }

    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation));
    PrintStream out = new PrintStream(System.out, true, "UTF-8");
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        out.println(doc.getField(StatusField.ID.name).stringValue() + "\t"
                + doc.getField(StatusField.SCREEN_NAME.name).stringValue());
    }
    out.close();
    reader.close();
}

From source file:ccc.plugins.search.lucene.AclFilter.java

License:Open Source License

/** {@inheritDoc} */
@Override//from ww  w .  j av  a2  s . c o  m
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {

    // Assume all documents are invalid.
    final OpenBitSet docs = new OpenBitSet(reader.maxDoc());

    // Validate accessible documents.
    for (int i = 0; i < reader.maxDoc(); i++) {
        final Document d = reader.document(i);
        final Field aclField = d.getField(_field);
        if (null != aclField && _ac.canRead(deserialise(aclField.getBinaryValue()))) {
            docs.set(i);
        }
    }

    return docs;
}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

@Override
protected BytesRef getFromStore(long uid) throws IOException {
    int docid = mapDocId(uid);
    if (docid < 0) {
        return null;
    }/*from w w  w  .  j  a  v  a 2 s  . c om*/
    IndexReader reader = null;
    if (currentReaderData != null) {
        reader = currentReaderData.reader;
    }
    if (docid >= 0 && reader != null) {
        Document doc = reader.document(docid);
        if (doc != null) {
            return doc.getBinaryValue(field);
        }
    }
    return null;
}