List of usage examples for org.apache.lucene.util BytesRef utf8ToString
public String utf8ToString()
From source file:com.twentyn.patentSearch.DocumentSearch.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*from w ww . j a va 2 s. c om*/ Options opts = new Options(); opts.addOption(Option.builder("x").longOpt("index").hasArg().required().desc("Path to index file to read") .build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); opts.addOption(Option.builder("f").longOpt("field").hasArg().desc("The indexed field to search").build()); opts.addOption( Option.builder("q").longOpt("query").hasArg().desc("The query to use when searching").build()); opts.addOption(Option.builder("l").longOpt("list-file").hasArg() .desc("A file containing a list of queries to run in sequence").build()); opts.addOption( Option.builder("e").longOpt("enumerate").desc("Enumerate the documents in the index").build()); opts.addOption(Option.builder("d").longOpt("dump").hasArg() .desc("Dump terms in the document index for a specified field").build()); opts.addOption( Option.builder("o").longOpt("output").hasArg().desc("Write results JSON to this file.").build()); opts.addOption(Option.builder("n").longOpt("inchi-field").hasArg() .desc("The index of the InChI field if an input TSV is specified.").build()); opts.addOption(Option.builder("s").longOpt("synonym-field").hasArg() .desc("The index of the chemical synonym field if an input TSV is specified.").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (!(cmdLine.hasOption("enumerate") || cmdLine.hasOption("dump") || (cmdLine.hasOption("field") && (cmdLine.hasOption("query") || cmdLine.hasOption("list-file"))))) { System.out.println("Must specify one of 'enumerate', 'dump', or 'field' + {'query', 'list-file'}"); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } ObjectMapper objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); objectMapper.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.ANY); LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); try (Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); IndexReader indexReader = DirectoryReader.open(indexDir);) { if (cmdLine.hasOption("enumerate")) { /* Enumerate all documents in the index. * With help from * http://stackoverflow.com/questions/2311845/is-it-possible-to-iterate-through-documents-stored-in-lucene-index */ for (int i = 0; i < indexReader.maxDoc(); i++) { Document doc = indexReader.document(i); LOGGER.info("Doc " + i + ":"); LOGGER.info(doc); } } else if (cmdLine.hasOption("dump")) { /* Dump indexed terms for a specific field. * With help from http://stackoverflow.com/questions/11148036/find-list-of-terms-indexed-by-lucene */ Terms terms = SlowCompositeReaderWrapper.wrap(indexReader).terms(cmdLine.getOptionValue("dump")); LOGGER.info("Has positions: " + terms.hasPositions()); LOGGER.info("Has offsets: " + terms.hasOffsets()); LOGGER.info("Has freqs: " + terms.hasFreqs()); LOGGER.info("Stats: " + terms.getStats()); LOGGER.info(terms); TermsEnum termsEnum = terms.iterator(); BytesRef br = null; while ((br = termsEnum.next()) != null) { LOGGER.info(" " + br.utf8ToString()); } } else { IndexSearcher searcher = new IndexSearcher(indexReader); String field = cmdLine.getOptionValue("field"); List<Pair<String, String>> queries = null; if (cmdLine.hasOption("query")) { queries = Collections.singletonList(Pair.of("", cmdLine.getOptionValue("query"))); } else if (cmdLine.hasOption("list-file")) { if (!(cmdLine.hasOption("inchi-field") && cmdLine.hasOption("synonym-field"))) { LOGGER.error("Must specify both inchi-field and synonym-field when using list-file."); System.exit(1); } Integer inchiField = Integer.parseInt(cmdLine.getOptionValue("inchi-field")); Integer synonymField = Integer.parseInt(cmdLine.getOptionValue("synonym-field")); queries = new LinkedList<>(); BufferedReader r = new BufferedReader(new FileReader(cmdLine.getOptionValue("list-file"))); String line; while ((line = r.readLine()) != null) { line = line.trim(); if (!line.isEmpty()) { // TODO: use a proper TSV reader; this is intentionally terrible as is. String[] fields = line.split("\t"); queries.add(Pair.of(fields[inchiField].replace("\"", ""), fields[synonymField])); } } r.close(); } if (queries == null || queries.size() == 0) { LOGGER.error("Found no queries to run."); return; } List<SearchResult> searchResults = new ArrayList<>(queries.size()); for (Pair<String, String> queryPair : queries) { String inchi = queryPair.getLeft(); String rawQueryString = queryPair.getRight(); /* The Lucene query parser interprets the kind of structural annotations we see in chemical entities * as query directives, which is not what we want at all. Phrase queries seem to work adequately * with the analyzer we're currently using. */ String queryString = rawQueryString.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); for (String p : parts) { query.add(new Term(field, p)); } LOGGER.info("Running query: " + query.toString()); BooleanQuery bq = new BooleanQuery(); bq.add(query, BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(field, "yeast")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "ferment")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentation")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "fermentive")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(field, "saccharomyces")), BooleanClause.Occur.SHOULD); LOGGER.info(" Full query: " + bq.toString()); TopDocs topDocs = searcher.search(bq, 100); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length == 0) { LOGGER.info("Search returned no results."); } List<ResultDocument> results = new ArrayList<>(scoreDocs.length); for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document doc = indexReader.document(scoreDoc.doc); LOGGER.info("Doc " + i + ": " + scoreDoc.doc + ", score " + scoreDoc.score + ": " + doc.get("id") + ", " + doc.get("title")); results.add(new ResultDocument(scoreDoc.doc, scoreDoc.score, doc.get("title"), doc.get("id"), null)); } LOGGER.info("----- Done with query " + query.toString()); // TODO: reduce memory usage when not writing results to an output file. searchResults.add(new SearchResult(inchi, rawQueryString, bq, results)); } if (cmdLine.hasOption("output")) { try (FileWriter writer = new FileWriter(cmdLine.getOptionValue("output"));) { writer.write(objectMapper.writeValueAsString(searchResults)); } } } } }
From source file:CopulaResources.TermCooccurence.java
public static TermCooccurence generateCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, int minFreq, int maxFreq) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass; while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs;// w w w .j a v a2 s . com topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyFreq(tpClass, minFreq, maxFreq); } } } System.out.println(":::Generation Complete:::"); return CooccurList; }
From source file:CopulaResources.TermCooccurence.java
public static void generateCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, int minFreq, int maxFreq, Path saveDir) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass; while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs;/*from w w w . j a v a 2 s . c o m*/ topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyFreq(tpClass, minFreq, maxFreq); } } } CooccurList.savetoFile(saveDir); System.out.println(":::Generation Complete:::"); }
From source file:CopulaResources.TermCooccurence.java
public static void generateNCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, String direction, double percent, Path saveDir) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass; while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs;/*ww w . j a va 2 s. c o m*/ topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyPercent(tpClass, direction, percent); } } } CooccurList.savetoFile(saveDir); System.out.println(":::Generation Complete:::"); }
From source file:de.blizzy.documentr.search.PageIndex.java
License:Open Source License
public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException { IndexReader reader = null;/*from w w w . jav a 2 s . com*/ IndexSearcher searcher = null; try { searcher = searcherManager.acquire(); Bits visibleDocs = getVisibleDocIds(searcher, authentication); Set<String> tags = Sets.newHashSet(); if (visibleDocs.length() > 0) { reader = searcher.getIndexReader(); Terms terms = MultiFields.getTerms(reader, TAG); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef ref; while ((ref = termsEnum.next()) != null) { DocsEnum docsEnum = termsEnum.docs(visibleDocs, null, 0); if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { tags.add(ref.utf8ToString()); } } } } return tags; } finally { if (searcher != null) { searcherManager.release(searcher); } } }
From source file:de.blizzy.documentr.search.TagFinder.java
License:Open Source License
public Set<String> getAllTags(Authentication authentication) throws IOException, TimeoutException { IndexReader reader = null;//ww w . j a v a 2s.c o m IndexSearcher searcher = null; try { searcher = searcherManager.acquire(); // no point in running the task asynchronously here GetVisibleDocIdsTask visibleDocIdsTask = new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor); Bits visibleDocIds = visibleDocIdsTask.call(); Set<String> tags = Sets.newHashSet(); if (visibleDocIds.length() > 0) { reader = searcher.getIndexReader(); Terms terms = MultiFields.getTerms(reader, PageIndex.TAG); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef ref; while ((ref = termsEnum.next()) != null) { DocsEnum docsEnum = termsEnum.docs(visibleDocIds, null, 0); if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { tags.add(ref.utf8ToString()); } } } } return tags; } finally { if (searcher != null) { searcherManager.release(searcher); } } }
From source file:de.mirkosertic.desktopsearch.SearchPhraseSuggester.java
License:Open Source License
public List<Suggestion> suggestSearchPhrase(String aFieldName, String aPhrase) throws IOException { LOGGER.info("Trying to find suggestions for phrase " + aPhrase); long theStartTime = System.currentTimeMillis(); try {//from w ww .j a v a 2 s . c om List<String> theTokens = toTokens(aFieldName, aPhrase); List<SpanQuery> theSpanQueries = theTokens.stream().map(s -> { if (QueryUtils.isWildCard(s)) { WildcardQuery theWildcardQuery = new WildcardQuery(new Term(aFieldName, s)); SpanMultiTermQueryWrapper theWrapper = new SpanMultiTermQueryWrapper(theWildcardQuery); try { return theWrapper.getRewriteMethod().rewrite(indexReader, theWildcardQuery); } catch (IOException e) { throw new RuntimeException(e); } } return new SpanTermQuery(new Term(aFieldName, s)); }).collect(Collectors.toList()); SpanQuery theSpanQuery = new SpanNearQuery(theSpanQueries.toArray(new SpanQuery[theSpanQueries.size()]), configuration.getSuggestionSlop(), configuration.isSuggestionInOrder()); LOGGER.info("created span query " + theSpanQuery); LeafReader theAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Map<Term, TermContext> theTermContexts = new HashMap<>(); Map<String, Long> theSpanFrequencies = new HashMap<>(); // These are all the matching spans over all documents Spans theMatchingSpans = theSpanQuery.getSpans(theAtomicReader.getContext(), new Bits.MatchAllBits(indexReader.numDocs()), theTermContexts); while (theMatchingSpans.next()) { // This maps the position of a term and the term string itself // the positions must be in order, so we have to use a treemap. Map<Integer, String> theEntries = new TreeMap<>(); Terms theAllTermsFromDocument = indexReader.getTermVector(theMatchingSpans.doc(), IndexFields.CONTENT_NOT_STEMMED); int theSpanStart = theMatchingSpans.start() - configuration.getSuggestionWindowBefore(); int theSpanEnd = theMatchingSpans.end() + configuration.getSuggestionWindowAfter(); TermsEnum theTermsEnum = theAllTermsFromDocument.iterator(null); BytesRef theTerm; while ((theTerm = theTermsEnum.next()) != null) { DocsAndPositionsEnum thePositionEnum = theTermsEnum.docsAndPositions(null, null); if (thePositionEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int i = 0; int position; while (i < thePositionEnum.freq() && (position = thePositionEnum.nextPosition()) != -1) { if (position >= theSpanStart && position <= theSpanEnd) { theEntries.put(position, theTerm.utf8ToString()); } i++; } } } StringBuilder theResultString = new StringBuilder(); theEntries.entrySet().forEach(e -> { if (theResultString.length() > 0) { theResultString.append(" "); } theResultString.append(e.getValue()); }); String theTotalSpan = theResultString.toString().trim(); Long theFrequency = theSpanFrequencies.get(theTotalSpan); if (theFrequency == null) { theSpanFrequencies.put(theTotalSpan, 1L); } else { theSpanFrequencies.put(theTotalSpan, theFrequency + 1); } } return theSpanFrequencies.entrySet().stream().filter(t -> t.getValue() > 1) .sorted((o1, o2) -> o2.getValue().compareTo(o1.getValue())) .limit(configuration.getNumberOfSuggestions()) .map(T -> new Suggestion(highlight(T.getKey(), theTokens), T.getKey())) .collect(Collectors.toList()); } finally { long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Took " + theDuration + "ms"); } }
From source file:de.tuberlin.dima.cuttlefish.preprocessing.vectorization.Vectorizer.java
License:Open Source License
public void vectorize(File luceneIndexDir, File outputDir) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); SequenceFile.Writer writer = null; FeatureDictionary dict = new FeatureDictionary(); DirectoryReader reader = null;/* w ww . j ava 2 s .com*/ try { reader = DirectoryReader.open(new SimpleFSDirectory(luceneIndexDir)); writer = SequenceFile.createWriter(fs, conf, new Path(outputDir.toString(), "documentVectors.seq"), IDAndCodes.class, VectorWritable.class); IDAndCodes idAndCodes = new IDAndCodes(); VectorWritable vectorWritable = new VectorWritable(); Fields fields = MultiFields.getFields(reader); if (fields != null) { Iterator<String> fieldNames = fields.iterator(); while (fieldNames.hasNext()) { String field = fieldNames.next(); if (!field.startsWith("bip:") && !"itemID".equals(field)) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { dict.addTextFeature(field, text.utf8ToString()); } } } } int numDocsVectorized = 0; for (int docID = 0; docID < reader.maxDoc(); docID++) { Document doc = reader.document(docID); int itemID = doc.getField("itemID").numericValue().intValue(); RandomAccessSparseVector documentVector = new RandomAccessSparseVector(dict.numFeatures()); Multimap<String, String> codes = HashMultimap.create(); for (IndexableField field : doc.getFields()) { String fieldName = field.name(); if (!fieldName.startsWith("bip:") && !"itemID".equals(fieldName)) { Terms termFreqVector = reader.getTermVector(docID, fieldName); if (termFreqVector != null) { int maxTermFrequency = maxTermFrequency(termFreqVector); TermsEnum te = termFreqVector.iterator(null); BytesRef term; while ((term = te.next()) != null) { String termStr = term.utf8ToString(); int termFrequency = (int) te.totalTermFreq(); int documentFrequency = reader.docFreq(new Term(fieldName, term)); int numDocs = reader.numDocs(); double weight = weighting.weight(fieldName, termStr, termFrequency, documentFrequency, maxTermFrequency, numDocs); int featureIndex = dict.index(fieldName, term.utf8ToString()); documentVector.setQuick(featureIndex, weight); } } } else if (fieldName.startsWith("bip:")) { for (String value : doc.getValues(fieldName)) { codes.put(fieldName, value); } } } Vector featureVector = new SequentialAccessSparseVector(documentVector); weighting.normalize(featureVector); idAndCodes.set(itemID, codes); vectorWritable.set(featureVector); writer.append(idAndCodes, vectorWritable); numDocsVectorized++; if (numDocsVectorized % 100 == 0) { log.info("Vectorized {} documents", numDocsVectorized); } } log.info("Vectorized {} documents", numDocsVectorized); dict.writeToFile(new File(outputDir, "features.txt")); log.info("Wrote feature dictionary"); } finally { Closeables.close(reader, true); Closeables.close(writer, true); } }
From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.base.LuceneFeatureExtractorBase.java
License:Apache License
@Override protected FrequencyDistribution<String> getTopNgrams() throws ResourceInitializationException { FrequencyDistribution<String> topNGrams = new FrequencyDistribution<String>(); MinMaxPriorityQueue<TermFreqTuple> topN = MinMaxPriorityQueue.maximumSize(getTopN()).create(); long ngramVocabularySize = 0; IndexReader reader;// w w w . jav a 2 s.com try { reader = DirectoryReader.open(FSDirectory.open(luceneDir)); Fields fields = MultiFields.getFields(reader); if (fields != null) { Terms terms = fields.terms(getFieldName()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); long freq = termsEnum.totalTermFreq(); if (passesScreening(term)) { topN.add(new TermFreqTuple(term, freq)); ngramVocabularySize += freq; } } } } } catch (Exception e) { throw new ResourceInitializationException(e); } int size = topN.size(); for (int i = 0; i < size; i++) { TermFreqTuple tuple = topN.poll(); long absCount = tuple.getFreq(); double relFrequency = ((double) absCount) / ngramVocabularySize; if (relFrequency >= ngramFreqThreshold) { topNGrams.addSample(tuple.getTerm(), tuple.getFreq()); } } logSelectionProcess(topNGrams.getB()); return topNGrams; }
From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.meta.LuceneNGramMetaCollectorTest.java
License:Apache License
@Test public void luceneNgramMetaCollectorTest() throws Exception { File tmpDir = folder.newFolder(); CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(TextReader.class, TextReader.PARAM_SOURCE_LOCATION, "src/test/resources/data/", TextReader.PARAM_LANGUAGE, "en", TextReader.PARAM_PATTERNS, "text*.txt"); AnalysisEngineDescription segmenter = AnalysisEngineFactory .createEngineDescription(BreakIteratorSegmenter.class); AnalysisEngineDescription metaCollector = AnalysisEngineFactory .createEngineDescription(LuceneNGramMetaCollector.class, LuceneNGramDFE.PARAM_LUCENE_DIR, tmpDir); for (JCas jcas : new JCasIterable(reader, segmenter, metaCollector)) { // System.out.println(jcas.getDocumentText().length()); }/*from ww w . j a v a 2 s . c o m*/ int i = 0; IndexReader index; try { index = DirectoryReader.open(FSDirectory.open(tmpDir)); Fields fields = MultiFields.getFields(index); if (fields != null) { Terms terms = fields.terms(LuceneNGramDFE.LUCENE_NGRAM_FIELD); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); // Bits liveDocs = MultiFields.getLiveDocs(index); // DocsEnum docs = termsEnum.docs(liveDocs, null); // int docId; // while((docId = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { // index.g // } BytesRef text = null; while ((text = termsEnum.next()) != null) { // System.out.println(text.utf8ToString() + " - " + termsEnum.totalTermFreq()); // System.out.println(termsEnum.docFreq()); if (text.utf8ToString().equals("this")) { assertEquals(2, termsEnum.docFreq()); assertEquals(3, termsEnum.totalTermFreq()); } i++; } } } } catch (Exception e) { throw new ResourceInitializationException(e); } assertEquals(35, i); }