List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:aos.lucene.search.advanced.CategorizerTest.java
License:Apache License
private void buildCategoryVectors() throws IOException { IndexReader reader = DirectoryReader.open(TestUtil.getBookIndexDirectory()); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i); String category = doc.get("category"); Map vectorMap = (Map) categoryMap.get(category); if (vectorMap == null) { vectorMap = new TreeMap(); categoryMap.put(category, vectorMap); }/*from w w w .j a v a 2 s . c om*/ TermFreqVector termFreqVector = reader.getTermFreqVector(i, "subject"); addTermFreqToMap(vectorMap, termFreqVector); } } }
From source file:aos.lucene.search.ext.filters.SpecialsFilter.java
License:Apache License
public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet bits = new OpenBitSet(reader.maxDoc()); String[] isbns = accessor.isbns(); // int[] docs = new int[1]; int[] freqs = new int[1]; for (String isbn : isbns) { if (isbn != null) { TermDocs termDocs = reader.termDocs(new Term("isbn", isbn)); // int count = termDocs.read(docs, freqs); if (count == 1) { bits.set(docs[0]);//from www. j a va 2 s . c o m } } } return bits; }
From source file:aos.lucene.tools.BooksMoreLikeThis.java
License:Apache License
public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); mlt.setFieldNames(new String[] { "title", "author" }); mlt.setMinTermFreq(1);// w w w . j a v a 2s .c o m mlt.setMinDocFreq(1); for (int docID = 0; docID < numDocs; docID++) { LOGGER.info(); Document doc = reader.document(docID); LOGGER.info(doc.get("title")); Query query = mlt.like(docID); LOGGER.info(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) LOGGER.info(" None like this"); for (int i = 0; i < similarDocs.scoreDocs.length; i++) { if (similarDocs.scoreDocs[i].doc != docID) { doc = reader.document(similarDocs.scoreDocs[i].doc); LOGGER.info(" -> " + doc.getField("title").stringValue()); } } } reader.close(); directory.close(); }
From source file:br.bireme.ngrams.NGrams.java
public static void export(NGIndex index, final NGSchema schema, final String outFile, final String outFileEncoding) throws IOException { if (index == null) { throw new NullPointerException("index"); }//w ww.j a v a 2 s. co m if (schema == null) { throw new NullPointerException("schema"); } if (outFile == null) { throw new NullPointerException("outFile"); } if (outFileEncoding == null) { throw new NullPointerException("outFileEncoding"); } final Parameters parameters = schema.getParameters(); final TreeMap<Integer, String> fields = new TreeMap<>(); final IndexReader reader = index.getIndexSearcher().getIndexReader(); final int maxdoc = reader.maxDoc(); //final Bits liveDocs = MultiFields.getLiveDocs(reader); final Bits liveDocs = MultiBits.getLiveDocs(reader); final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding), StandardOpenOption.CREATE, StandardOpenOption.WRITE); boolean first = true; for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) { fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD); } for (int docID = 0; docID < maxdoc; docID++) { if ((liveDocs != null) && (!liveDocs.get(docID))) continue; final Document doc = reader.document(docID); if (first) { first = false; } else { writer.newLine(); } writer.append(doc2pipe(doc, fields)); } writer.close(); reader.close(); }
From source file:br.bireme.ngrams.TestIndex.java
public static boolean test(final IndexReader ireader, final NGSchema schema) throws IOException, ParserConfigurationException, SAXException { final Parameters parameters = schema.getParameters(); final Map<String, Field> fields = parameters.getNameFields(); boolean bad = false; for (int id = 0; id < ireader.maxDoc(); id++) { final Document doc = ireader.document(id); if (id % 100000 == 0) System.out.println("+++ " + id); bad = badDocument(doc, fields);//from ww w . java 2s. com if (bad) { System.out.println("BAD DOCUMENT => id: " + doc.get("id")); break; } } ireader.close(); return !bad; }
From source file:ca.gnewton.lusql.core.ViewIndex.java
License:Apache License
/** * Describe <code>main</code> method here. * * @param args a <code>String</code> value */// w ww.j a v a 2 s . co m public static final void main(final String[] args) { try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(args[0]))); System.out.println("# of documents indexed: " + (reader.maxDoc() - 1)); int maxDoc = reader.maxDoc(); for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(i); printDoc(doc, i); } reader.close(); } catch (Throwable t) { t.printStackTrace(); } }
From source file:cc.twittertools.index.ExtractTweetidsFromIndex.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(//from w ww.j av a 2 s. co m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(ExtractTweetidsFromIndex.class.getName(), options); System.exit(-1); } File indexLocation = new File(cmdline.getOptionValue(INDEX_OPTION)); if (!indexLocation.exists()) { System.err.println("Error: " + indexLocation + " does not exist!"); System.exit(-1); } IndexReader reader = DirectoryReader.open(FSDirectory.open(indexLocation)); PrintStream out = new PrintStream(System.out, true, "UTF-8"); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); out.println(doc.getField(StatusField.ID.name).stringValue() + "\t" + doc.getField(StatusField.SCREEN_NAME.name).stringValue()); } out.close(); reader.close(); }
From source file:ccc.plugins.search.lucene.AclFilter.java
License:Open Source License
/** {@inheritDoc} */ @Override/*from www . j av a 2 s. co m*/ public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { // Assume all documents are invalid. final OpenBitSet docs = new OpenBitSet(reader.maxDoc()); // Validate accessible documents. for (int i = 0; i < reader.maxDoc(); i++) { final Document d = reader.document(i); final Field aclField = d.getField(_field); if (null != aclField && _ac.canRead(deserialise(aclField.getBinaryValue()))) { docs.set(i); } } return docs; }
From source file:com.appspot.socialinquirer.server.service.impl.AnalysisServiceImpl.java
License:Apache License
@Override public List<Tag> getTermVector(String title, String text) { RAMDirectory directory = null;/*from w w w. j a va 2 s . co m*/ IndexReader reader = null; Map<String, Tag> tagsMap = new HashMap<String, Tag>(); try { directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, MaxFieldLength.UNLIMITED); Document doc = new Document(); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("body", stripHtmlTags(text, true), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); writer.addDocument(doc); writer.close(); reader = IndexReader.open(directory, true); int numDocs = reader.maxDoc(); for (int i = 0; i < numDocs; i++) { TermFreqVector termFreqVector = reader.getTermFreqVector(i, "title"); pullTags(termFreqVector, tagsMap); termFreqVector = reader.getTermFreqVector(i, "body"); pullTags(termFreqVector, tagsMap); } } catch (Exception e) { logger.log(Level.SEVERE, "An error occured while pulling tags from text.", e); } finally { closeIndexReader(reader); closeRAMDirectory(directory); } ArrayList<Tag> tagsList = new ArrayList<Tag>(tagsMap.values()); Collections.sort(tagsList, new Comparator<Tag>() { @Override public int compare(Tag o1, Tag o2) { return o2.getFreqency() - o1.getFreqency(); } }); return tagsList; }
From source file:com.bloatit.data.search.DaoFeatureSearchFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); bitSet.set(0, reader.maxDoc()); // Set all document ok if (filteredTerms != null) { for (final Pair<String, String> pair : filteredTerms) { final TermDocs termDocs = reader.termDocs(new Term(pair.key, pair.value.toLowerCase())); while (termDocs.next()) { bitSet.clear(termDocs.doc()); }/*from w w w . j av a 2s . c o m*/ } } return bitSet; }