List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected int[] getDocumentIds(IndexReader reader) { int[] documentIds = new int[reader.numDocs()]; for (int i = 0; i < documentIds.length; i++) { try {/* www .java2 s .co m*/ Document document = reader.document(i); documentIds[i] = Integer.parseInt(document.get(DOC_ID)); } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); } } return documentIds; }
From source file:BlockBuilding.Utilities.java
License:Open Source License
public static int[] getDocumentIds(IndexReader reader) { int[] documentIds = new int[reader.numDocs()]; for (int i = 0; i < documentIds.length; i++) { try {//from w w w. j a v a2s . c o m Document document = reader.document(i); documentIds[i] = Integer.parseInt(document.get(DOC_ID)); } catch (IOException ex) { ex.printStackTrace(); } } return documentIds; }
From source file:br.bireme.ngrams.NGrams.java
public static void export(NGIndex index, final NGSchema schema, final String outFile, final String outFileEncoding) throws IOException { if (index == null) { throw new NullPointerException("index"); }/* w ww . j ava2 s . c om*/ if (schema == null) { throw new NullPointerException("schema"); } if (outFile == null) { throw new NullPointerException("outFile"); } if (outFileEncoding == null) { throw new NullPointerException("outFileEncoding"); } final Parameters parameters = schema.getParameters(); final TreeMap<Integer, String> fields = new TreeMap<>(); final IndexReader reader = index.getIndexSearcher().getIndexReader(); final int maxdoc = reader.maxDoc(); //final Bits liveDocs = MultiFields.getLiveDocs(reader); final Bits liveDocs = MultiBits.getLiveDocs(reader); final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding), StandardOpenOption.CREATE, StandardOpenOption.WRITE); boolean first = true; for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) { fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD); } for (int docID = 0; docID < maxdoc; docID++) { if ((liveDocs != null) && (!liveDocs.get(docID))) continue; final Document doc = reader.document(docID); if (first) { first = false; } else { writer.newLine(); } writer.append(doc2pipe(doc, fields)); } writer.close(); reader.close(); }
From source file:br.bireme.ngrams.TestIndex.java
public static boolean test(final IndexReader ireader, final NGSchema schema) throws IOException, ParserConfigurationException, SAXException { final Parameters parameters = schema.getParameters(); final Map<String, Field> fields = parameters.getNameFields(); boolean bad = false; for (int id = 0; id < ireader.maxDoc(); id++) { final Document doc = ireader.document(id); if (id % 100000 == 0) System.out.println("+++ " + id); bad = badDocument(doc, fields);//from w w w. j av a 2 s .co m if (bad) { System.out.println("BAD DOCUMENT => id: " + doc.get("id")); break; } } ireader.close(); return !bad; }
From source file:ca.gnewton.lusql.core.ViewIndex.java
License:Apache License
/** * Describe <code>main</code> method here. * * @param args a <code>String</code> value *///from w ww.j a v a2 s . co m public static final void main(final String[] args) { try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(args[0]))); System.out.println("# of documents indexed: " + (reader.maxDoc() - 1)); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(i); printDoc(doc, i); } reader.close(); } catch (Throwable t) { t.printStackTrace(); } }
From source file:ca.ualberta.entitylinking.common.indexing.DocumentIndexer.java
License:Open Source License
public void readLuceneIndex(String indexDir, String docName) { IndexReader reader = null; Map<String, Integer> name2id = null; //load index//w w w . j av a 2 s .c o m try { reader = IndexReader.open(FSDirectory.open(new File(indexDir))); String[] stringArray = FieldCache.DEFAULT.getStrings(reader, "name"); // build a map from string to its document id. name2id = new HashMap<String, Integer>(); for (int i = 0; i < stringArray.length; i++) name2id.put(stringArray[i], i); } catch (IOException e) { e.printStackTrace(); } //get tf-idf vector of a document. DefaultSimilarity simObj = new DefaultSimilarity(); try { if (!name2id.containsKey(docName)) return; int docId = name2id.get(docName); Document doc = reader.document(docId); TermFreqVector termVector = reader.getTermFreqVector(docId, "contents"); int numDocs = reader.numDocs(); int[] termFreq = termVector.getTermFrequencies(); String[] terms = termVector.getTerms(); for (int i = 0; i < terms.length; i++) { //avoid stop words // if (isStopWord(terms[i])) // continue; int tf = termFreq[i]; int df = reader.docFreq(new Term("contents", terms[i])); float tfidf = simObj.tf(tf) * simObj.idf(df, numDocs); System.out.println(terms[i] + ": " + tfidf); } } catch (Exception e) { e.printStackTrace(); } }
From source file:cc.osint.graphd.graph.Graph.java
License:Apache License
public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception { long start_t = System.currentTimeMillis(); final List<JSONObject> results = new ArrayList<JSONObject>(); QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer); qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); qp.setAllowLeadingWildcard(true);//from ww w . j a va 2s .c o m Query query = qp.parse(queryStr); org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter( new QueryWrapperFilter(query)); indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() { private int docBase; IndexReader reader; // ignore scoring public void setScorer(Scorer scorer) { } // accept docs out of order public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { try { Document d = reader.document(doc); JSONObject result = new JSONObject(); for (Fieldable f : d.getFields()) { result.put(f.name(), d.get(f.name())); } results.add(result); } catch (Exception ex) { ex.printStackTrace(); } } public void setNextReader(IndexReader reader, int docBase) { this.reader = reader; this.docBase = docBase; } }); long end_t = System.currentTimeMillis(); //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)"); return results; }
From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(/*from w ww . j av a 2s.co m*/ OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); PrintStream out = new PrintStream(System.out, true, "UTF-8"); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); out.println(doc.getField(StatusField.ID.name).stringValue() + "\t" + doc.getField(StatusField.SCREEN_NAME.name).stringValue()); } out.close(); reader.close(); }
From source file:ccc.plugins.search.lucene.AclFilter.java
License:Open Source License
/** {@inheritDoc} */ @Override//from ww w . j av a2 s . c o m public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { // Assume all documents are invalid. final OpenBitSet docs = new OpenBitSet(reader.maxDoc()); // Validate accessible documents. for (int i = 0; i < reader.maxDoc(); i++) { final Document d = reader.document(i); final Field aclField = d.getField(_field); if (null != aclField && _ac.canRead(deserialise(aclField.getBinaryValue()))) { docs.set(i); } } return docs; }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override protected BytesRef getFromStore(long uid) throws IOException { int docid = mapDocId(uid); if (docid < 0) { return null; }/*from w w w . j a v a 2 s . c om*/ IndexReader reader = null; if (currentReaderData != null) { reader = currentReaderData.reader; } if (docid >= 0 && reader != null) { Document doc = reader.document(docid); if (doc != null) { return doc.getBinaryValue(field); } } return null; }