List of usage examples for org.apache.lucene.index LeafReader getFieldInfos
public abstract FieldInfos getFieldInfos();
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReader.java
License:Open Source License
/** * Wrap a single segment, exposing a subset of its fields. *///from w w w. ja v a 2 s . co m FieldSubsetReader(LeafReader in, CharacterRunAutomaton filter) { super(in); ArrayList<FieldInfo> filteredInfos = new ArrayList<>(); for (FieldInfo fi : in.getFieldInfos()) { if (filter.run(fi.name)) { filteredInfos.add(fi); } } fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); this.filter = filter; }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two string fields//from www. j a va 2s . c om */ public void testIndexed() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StringField("fieldA", "test", Field.Store.NO)); doc.add(new StringField("fieldB", "test", Field.Store.NO)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); Set<String> seenFields = new HashSet<>(); for (FieldInfo info : segmentReader.getFieldInfos()) { seenFields.add(info.name); } assertEquals(Collections.singleton("fieldA"), seenFields); assertNotNull(segmentReader.terms("fieldA")); assertNull(segmentReader.terms("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test we have correct fieldinfos metadata *//* w ww . j a v a 2s .c o m*/ public void testFieldInfos() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StringField("fieldA", "test", Field.Store.NO)); doc.add(new StringField("fieldB", "test", Field.Store.NO)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); FieldInfos infos = segmentReader.getFieldInfos(); assertEquals(1, infos.size()); assertNotNull(infos.fieldInfo("fieldA")); assertNull(infos.fieldInfo("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering an index with no fields *//*from www. j a v a2 s .c o m*/ public void testEmpty() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); iw.addDocument(new Document()); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see no fields LeafReader segmentReader = ir.leaves().get(0).reader(); Set<String> seenFields = new HashSet<>(); for (FieldInfo info : segmentReader.getFieldInfos()) { seenFields.add(info.name); } assertEquals(0, seenFields.size()); assertNull(segmentReader.terms("foo")); // see no vectors assertNull(segmentReader.getTermVectors(0)); // see no stored fields Document document = segmentReader.document(0); assertEquals(0, document.getFields().size()); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.tallison.gramreaper.terms.DumpTerms.java
License:Apache License
private void dumpTopN(LeafReader leafReader) throws IOException { if (config.field == null) { FieldInfos fieldInfos = leafReader.getFieldInfos(); for (FieldInfo fieldInfo : fieldInfos) { dumpTopNField(leafReader, fieldInfo.name); }//from www. j a v a 2 s . co m } else { dumpTopNField(leafReader, config.field); } }
From source file:org.tallison.gramreaper.terms.DumpTerms.java
License:Apache License
private void dumpTopNField(LeafReader leafReader, String field) throws IOException { AbstractTokenTFDFPriorityQueue queue = config.sort.equals(DumpTermsConfig.SORT.DF) ? new TokenDFPriorityQueue(config.topN) : new TokenTFPriorityQueue(config.topN); Terms terms = leafReader.terms(field); if (terms == null) { StringBuilder sb = new StringBuilder(); int i = 0; for (FieldInfo fieldInfo : leafReader.getFieldInfos()) { if (i++ > 0) { sb.append("\n"); }/* ww w .j a v a 2s . co m*/ sb.append(fieldInfo.name); } throw new RuntimeException("I can't find field \"" + field + "\".\n" + "I only see:\n" + sb.toString()); } TermsEnum termsEnum = terms.iterator(); BytesRef bytesRef = termsEnum.next(); int docsWThisField = leafReader.getDocCount(field); while (bytesRef != null) { int df = termsEnum.docFreq(); long tf = termsEnum.totalTermFreq(); if (config.minDocFreq > -1 && df < config.minDocFreq) { bytesRef = termsEnum.next(); continue; } if (config.minDocPercentage > -1.0d && (double) df / (double) docsWThisField < config.minDocPercentage) { bytesRef = termsEnum.next(); continue; } if (queue.top() == null || queue.size() < config.topN || (config.sort.equals(DumpTermsConfig.SORT.DF) ? df >= queue.top().df : tf > queue.top().tf)) { String t = bytesRef.utf8ToString(); if (!config.stopWords.contains(t) && !config.startWords.contains(t)) { queue.insertWithOverflow(new TokenDFTF(t, df, tf)); } } bytesRef = termsEnum.next(); } if (config.outputFile == null) { StringBuilder sb = new StringBuilder(); for (TokenDFTF tp : queue.getArray()) { System.out.println(getRow(sb, tp)); } } else if (Files.isDirectory(config.outputFile)) { writeTopN(config.outputFile.resolve(field), queue); } else { writeTopN(config.outputFile, queue); } }