Example usage for org.apache.lucene.index SegmentReader directory

List of usage examples for org.apache.lucene.index SegmentReader directory

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentReader directory.

Prototype

public Directory directory() 

Source Link

Document

Returns the directory this index resides in.

Usage

From source file:org.apache.blur.filter.FilterCache.java

License:Apache License

private Directory getDirectory(SegmentReader reader) {
    return reader.directory();
}

From source file:org.apache.blur.lucene.warmup.IndexWarmup.java

License:Apache License

private Directory getDirectory(IndexReader reader, String segmentName, String context) {
    if (reader instanceof AtomicReader) {
        return getDirectory((AtomicReader) reader, segmentName, context);
    }/*from  w  w w .  j a  v  a  2s . com*/
    for (IndexReaderContext ctext : reader.getContext().leaves()) {
        if (_isClosed.get()) {
            LOG.info("Context [{0}] index closed", context);
            return null;
        }
        AtomicReaderContext atomicReaderContext = (AtomicReaderContext) ctext;
        AtomicReader atomicReader = atomicReaderContext.reader();
        if (atomicReader instanceof SegmentReader) {
            SegmentReader segmentReader = (SegmentReader) atomicReader;
            if (segmentReader.getSegmentName().equals(segmentName)) {
                return segmentReader.directory();
            }
        }
    }
    return null;
}

From source file:org.apache.blur.lucene.warmup.IndexWarmup.java

License:Apache License

private Directory getDirectory(AtomicReader atomicReader, String segmentName, String context) {
    if (atomicReader instanceof SegmentReader) {
        SegmentReader segmentReader = (SegmentReader) atomicReader;
        if (segmentReader.getSegmentName().equals(segmentName)) {
            return segmentReader.directory();
        }//from   w  ww.  j a  v a  2  s.  c om
    }
    return null;
}

From source file:org.apache.blur.lucene.warmup.IndexWarmup.java

License:Apache License

public Map<String, List<IndexTracerResult>> sampleIndex(AtomicReader atomicReader, String context)
        throws IOException {
    Map<String, List<IndexTracerResult>> results = new HashMap<String, List<IndexTracerResult>>();
    if (atomicReader instanceof SegmentReader) {
        SegmentReader segmentReader = (SegmentReader) atomicReader;
        Directory directory = segmentReader.directory();
        if (!(directory instanceof TraceableDirectory)) {
            LOG.info("Context [{1}] cannot warmup directory [{0}] needs to be a TraceableDirectory.", directory,
                    context);//from  w  ww  . j a  va2  s.  c  o m
            return results;
        }
        IndexTracer tracer = new IndexTracer((TraceableDirectory) directory, _maxSampleSize);
        String fileName = getSampleFileName(segmentReader.getSegmentName());
        List<IndexTracerResult> segmentTraces = new ArrayList<IndexTracerResult>();
        if (directory.fileExists(fileName)) {
            IndexInput input = directory.openInput(fileName, IOContext.READONCE);
            segmentTraces = read(input);
            input.close();
        } else {
            Fields fields = atomicReader.fields();
            for (String field : fields) {
                LOG.debug("Context [{1}] sampling field [{0}].", field, context);
                Terms terms = fields.terms(field);
                boolean hasOffsets = terms.hasOffsets();
                boolean hasPayloads = terms.hasPayloads();
                boolean hasPositions = terms.hasPositions();

                tracer.initTrace(segmentReader, field, hasPositions, hasPayloads, hasOffsets);
                IndexTracerResult result = tracer.runTrace(terms);
                segmentTraces.add(result);
            }
            if (_isClosed.get()) {
                LOG.info("Context [{0}] index closed", context);
                return null;
            }
            IndexOutput output = directory.createOutput(fileName, IOContext.DEFAULT);
            write(segmentTraces, output);
            output.close();
        }
        results.put(segmentReader.getSegmentName(), segmentTraces);
    }
    return results;
}

From source file:org.apache.solr.codecs.onsql.ONSQLStoredFieldsWriter.java

License:Apache License

@Override
public int merge(MergeState mergeState) throws IOException {
    log.debug("merge has been called");
    // check for our primary key completeness
    /*/*from w w  w. j a  va 2  s  .com*/
     * instead of copying stored fields we need to update mapping table and remove non-existing entries   
     * several cases: doc may be deleted and non existing in new segment
     * doc may be deleted, yet similar doc might be found
     * thing is, these fields might be updated, so using linkup is not so productive
     * because in fact there can be two diffirent versions of documents present after the update
     * one, old version, deleted now, and one new version 
     * hmmm, since we're using custom primary key, it means on the update entry with our fields will be overwritten, 
     * also, worth considering, that merge procedure only creates new segment based on previous ones, 
     * deletion of old segments is happening later, via Directory.deletefile API
     * have to check on this.
     * 
     * first, since it's merge, we assume entries in kvstore already existing
     * so, first we search for our segment key: segID-docID->customPK, then copy it
     * all other fields will be left unchanged
     * 
     * have to consider the failure case, when merge might be aborted in the middle of it
     * since merge first copies data to new segment, we are safe here, as in worst case we will lose just the links
     * for this new segment
     * */

    int docCount = 0;
    int idx = 0;
    String new_segment_kvstore_key_part = Base62Converter
            .fromBase10(mergeState.segmentInfo.name.concat(STORED_FIELDS_EXTENSION).hashCode());
    for (AtomicReader reader : mergeState.readers) {
        final SegmentReader seg_reader = mergeState.matchingSegmentReaders[idx++];
        ONSQLStoredFieldsReader fields_reader = null;
        if (seg_reader != null) {
            final StoredFieldsReader fieldsReader = seg_reader.getFieldsReader();
            // we can do the merge only if the matching reader is also a ONSQLStoredFieldsReader
            if (fieldsReader != null && fieldsReader instanceof ONSQLStoredFieldsReader) {
                fields_reader = (ONSQLStoredFieldsReader) fieldsReader;
            } else
                throw new IllegalStateException("incorrect fieldsreader class at merge procedure, is "
                        + fieldsReader.getClass().getName() + ", only ONSQLStoredFieldsReader is accepted");
        }
        String current_segment = seg_reader.getSegmentName().concat(STORED_FIELDS_EXTENSION);
        log.debug("current segment name = " + seg_reader.getSegmentName());
        // we assume reader always uses the instance of FSDirectory, so that we can extract directory path
        String dir = ((FSDirectory) seg_reader.directory()).getDirectory().getAbsolutePath();
        final int maxDoc = reader.maxDoc();
        final Bits liveDocs = reader.getLiveDocs();
        boolean canmerge = ONSQLKVstoreHandler.getInstance().getAllowWriting(this.tdir);
        for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
            ++docCount;
            if (canmerge) {
                // retrieve link using our doc id
                Key doc_key = Key.createKey(Arrays.asList(Base62Converter.fromBase10(dir.hashCode()),
                        Base62Converter.fromBase10(current_segment.hashCode()), Base62Converter.fromBase10(i)));
                Iterator<Key> kv_it = kvstore.multiGetKeysIterator(Direction.FORWARD, 1, doc_key, null,
                        Depth.PARENT_AND_DESCENDANTS);
                if (!kv_it.hasNext())
                    throw new IllegalStateException(
                            "unable to get doc segment key using key id=" + doc_key.toString());
                Key entry_key = kv_it.next();
                // create link to doc id for new segment 
                Key link_key = Key.createKey(Arrays.asList(Base62Converter.fromBase10(dir.hashCode()),
                        new_segment_kvstore_key_part, Base62Converter.fromBase10(numDocsWritten)),
                        entry_key.getMinorPath());
                log.debug("putting link key=" + link_key.toString());
                kvstore.put(link_key, Value.EMPTY_VALUE);
                // next add backref
                Key backref_key = Key.createKey(entry_key.getMinorPath(),
                        Arrays.asList("_1", Base62Converter.fromBase10(dir.hashCode()),
                                new_segment_kvstore_key_part, Base62Converter.fromBase10(numDocsWritten)));
                kvstore.put(backref_key, Value.EMPTY_VALUE);
                log.debug("putting backref key=" + backref_key.toString());
                //addDocument(doc, mergeState.fieldInfos);
            } else
                log.debug("merging is not allowed, skipping doc with internal id=" + i);
            ++numDocsWritten;
            mergeState.checkAbort.work(300);
        }

    }

    finish(mergeState.fieldInfos, docCount);
    return docCount;
}

From source file:org.elasticsearch.index.shard.ShardUtils.java

License:Apache License

@Nullable
private static ShardId extractShardId(SegmentReader reader) {
    if (reader != null) {
        assert reader.getRefCount() > 0 : "SegmentReader is already closed";
        // reader.directory doesn't call ensureOpen for internal reasons.
        Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(reader.directory());
        if (storeDir != null) {
            return storeDir.shardId();
        }/*w ww.j  a v  a  2  s  . c  om*/
    }
    return null;
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from   w ww.ja  va  2s  .com*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        Bits docsWithField = reader.getDocsWithField(field.name);
        if (docsWithField != null) {
            int count = 0;
            for (int docID = 0; docID < reader.maxDoc(); docID++) {
                if (docsWithField.get(docID)) {
                    count++;
                }
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}

From source file:perf.DiskUsage.java

License:Apache License

static void report(SegmentReader reader, Set<FieldStats> stats) throws Exception {
    long totalSize = 0;
    long storeSize = 0;
    long vectorSize = 0;
    long normsSize = 0;
    long dvsSize = 0;
    long postingsSize = 0;
    long pointsSize = 0;
    long termsSize = 0;
    long proxSize = 0;
    for (String file : reader.directory().listAll()) {
        long size = reader.directory().fileLength(file);
        totalSize += size;/*from w w w .j av a2s.c  o m*/
        String extension = IndexFileNames.getExtension(file);
        if (extension != null) {
            switch (extension) {
            case "fdt":
            case "fdx":
                storeSize += size;
                break;
            case "tvx":
            case "tvd":
                vectorSize += size;
                break;
            case "nvd":
            case "nvm":
                normsSize += size;
                break;
            case "dvd":
            case "dvm":
                dvsSize += size;
                break;
            case "tim":
            case "tip":
                termsSize += size;
                break;
            case "pos":
            case "pay":
                proxSize += size;
                break;
            case "doc":
                postingsSize += size;
                break;
            case "dii":
            case "dim":
                pointsSize += size;
                break;
            }
        }
    }

    DecimalFormat df = new DecimalFormat("#,##0");
    System.out.printf("total disk:    %15s\n", df.format(totalSize));
    System.out.printf("num docs:      %15s\n", df.format(reader.numDocs()));
    System.out.printf("stored fields: %15s\n", df.format(storeSize));
    System.out.printf("term vectors:  %15s\n", df.format(vectorSize));
    System.out.printf("norms:         %15s\n", df.format(normsSize));
    System.out.printf("docvalues:     %15s\n", df.format(dvsSize));
    System.out.printf("postings:      %15s\n", df.format(postingsSize));
    System.out.printf("prox:          %15s\n", df.format(proxSize));
    System.out.printf("points:        %15s\n", df.format(pointsSize));
    System.out.printf("terms:         %15s\n", df.format(termsSize));
    System.out.println();

    int maxFieldNameLength = 0;
    for (FieldStats field : stats) {
        maxFieldNameLength = Math.max(maxFieldNameLength, field.name.length());
    }

    // Make sure we format to enough room for the max field length:
    String fieldNameFormat = "%" + maxFieldNameLength + "s";

    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "field", "total",
            "terms dict", "postings", "proximity", "points", "docvalues", "% with dv", "features");
    System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %15s %20s\n", "=====", "=====",
            "==========", "========", "=========", "=========", "=========", "========", "========");

    for (FieldStats field : stats) {
        System.out.printf(fieldNameFormat + " %15s %15s %15s %15s %15s %15s %14.1f%% %20s\n", field.name,
                df.format(field.totalBytes()), df.format(field.termsBytes), df.format(field.postingsBytes),
                df.format(field.proxBytes), df.format(field.pointsBytes), df.format(field.dvBytes),
                (100.0 * field.docCountWithField) / reader.maxDoc(),
                features(reader.getFieldInfos().fieldInfo(field.name)));
    }
}

From source file:perf.DiskUsage.java

License:Apache License

static Set<FieldStats> analyzeFields(SegmentReader reader) throws Exception {
    Map<String, FieldStats> stats = new HashMap<>();
    Map<String, String> dvSuffixes = new HashMap<>();
    Map<String, String> postingsSuffixes = new HashMap<>();
    for (FieldInfo field : reader.getFieldInfos()) {
        FieldStats fieldStats = new FieldStats(field.name);
        stats.put(field.name, fieldStats);
        Map<String, String> attributes = field.attributes();
        if (attributes != null) {
            String postingsSuffix = attributes.get(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY);
            if (postingsSuffix != null) {
                postingsSuffixes.put(postingsSuffix, field.name);
            }/*from   ww w  .  j a va 2s .  c om*/
            String dvSuffix = attributes.get(PerFieldDocValuesFormat.PER_FIELD_SUFFIX_KEY);
            if (dvSuffix != null) {
                dvSuffixes.put(dvSuffix, field.name);
            }
        }

        DocIdSetIterator docsWithField;
        switch (field.getDocValuesType()) {
        case NUMERIC:
            docsWithField = reader.getNumericDocValues(field.name);
            break;
        case BINARY:
            docsWithField = reader.getBinaryDocValues(field.name);
            break;
        case SORTED:
            docsWithField = reader.getSortedDocValues(field.name);
            break;
        case SORTED_NUMERIC:
            docsWithField = reader.getSortedNumericDocValues(field.name);
            break;
        case SORTED_SET:
            docsWithField = reader.getSortedSetDocValues(field.name);
            break;
        case NONE:
            docsWithField = null;
            break;
        default:
            docsWithField = null;
            break;
        }

        if (docsWithField != null) {
            int count = 0;
            while (docsWithField.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                count++;
            }
            fieldStats.docCountWithField = count;
        }
    }

    Directory directory = reader.directory();
    for (String file : directory.listAll()) {
        String suffix = parseSuffix(file);
        long bytes = directory.fileLength(file);
        if (suffix != null) {
            switch (IndexFileNames.getExtension(file)) {
            case "dvd":
            case "dvm":
                stats.get(dvSuffixes.get(suffix)).dvBytes += bytes;
                break;
            case "tim":
            case "tip":
                stats.get(postingsSuffixes.get(suffix)).termsBytes += bytes;
                break;
            case "doc":
                stats.get(postingsSuffixes.get(suffix)).postingsBytes += bytes;
                break;
            case "pos":
            case "pay":
                stats.get(postingsSuffixes.get(suffix)).proxBytes += bytes;
                break;
            default:
                throw new AssertionError("unexpected suffixed file: " + file);
            }
        } else {
            // not a per-field file, but we can hackishly do this for the points case.
            if ("dii".equals(IndexFileNames.getExtension(file))) {
                System.err.println(
                        "retrieving per-field point usage, if you see a scary corruption error, its probably just this tool!!!!");
                try (ChecksumIndexInput in = directory.openChecksumInput(file, IOContext.READONCE)) {
                    // fail hard if its not exactly the version we do this hack for.
                    CodecUtil.checkIndexHeader(in, "Lucene60PointsFormatMeta", 0, 0,
                            reader.getSegmentInfo().info.getId(), "");
                    int fieldCount = in.readVInt();
                    // strangely, bkd offsets are not in any guaranteed order
                    TreeMap<Long, String> offsetToField = new TreeMap<>();
                    for (int i = 0; i < fieldCount; i++) {
                        int field = in.readVInt();
                        long offset = in.readVLong();
                        offsetToField.put(offset, reader.getFieldInfos().fieldInfo(field).name);
                    }
                    // now we can traverse in order
                    long previousOffset = 0;
                    for (Map.Entry<Long, String> entry : offsetToField.entrySet()) {
                        long offset = entry.getKey();
                        String field = entry.getValue();
                        stats.get(field).pointsBytes += (offset - previousOffset);
                        previousOffset = offset;
                    }
                    CodecUtil.checkFooter(in);
                }
            }
        }
    }

    return new TreeSet<FieldStats>(stats.values());
}