Example usage for org.apache.lucene.index LeafReader maxDoc

List of usage examples for org.apache.lucene.index LeafReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//*  w  ww. j a  v a2 s.  c o  m*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.meizu.nlp.classification.utils.DataSplitterTest.java

License:Apache License

public static void assertSplit(LeafReader originalIndex, double testRatio, double crossValidationRatio,
        String... fieldNames) throws Exception {

    BaseDirectoryWrapper trainingIndex = newDirectory();
    BaseDirectoryWrapper testIndex = newDirectory();
    BaseDirectoryWrapper crossValidationIndex = newDirectory();

    try {/*from  w  w w . j a v a 2 s.  c  o m*/
        DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio);
        datasetSplitter.split(originalIndex, trainingIndex, testIndex, crossValidationIndex,
                new MockAnalyzer(random()), fieldNames);

        assertNotNull(trainingIndex);
        assertNotNull(testIndex);
        assertNotNull(crossValidationIndex);

        DirectoryReader trainingReader = DirectoryReader.open(trainingIndex);
        assertTrue((int) (originalIndex.maxDoc() * (1d - testRatio - crossValidationRatio)) == trainingReader
                .maxDoc());
        DirectoryReader testReader = DirectoryReader.open(testIndex);
        assertTrue((int) (originalIndex.maxDoc() * testRatio) == testReader.maxDoc());
        DirectoryReader cvReader = DirectoryReader.open(crossValidationIndex);
        assertTrue((int) (originalIndex.maxDoc() * crossValidationRatio) == cvReader.maxDoc());

        trainingReader.close();
        testReader.close();
        cvReader.close();
        closeQuietly(trainingReader);
        closeQuietly(testReader);
        closeQuietly(cvReader);
    } finally {
        if (trainingIndex != null) {
            trainingIndex.close();
        }
        if (testIndex != null) {
            testIndex.close();
        }
        if (crossValidationIndex != null) {
            crossValidationIndex.close();
        }
    }
}

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;/*from w  ww.j  a v  a 2s.  co m*/
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:de.unihildesheim.iw.lucene.search.IPCFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();
    final BitSet finalBits = new SparseFixedBitSet(maxDoc);

    if (acceptDocs == null) {
        // check all
        for (int i = 0; i < maxDoc; i++) {
            if (this.filterFunc.isAccepted(reader, i, this.ipcParser)) {
                finalBits.set(i);/*from  w  w w  .j av a 2  s  . c  o  m*/
            }
        }
    } else {
        final BitSet checkBits = BitsUtils.bits2BitSet(acceptDocs);
        final DocIdSetIterator disi = new BitDocIdSet(checkBits).iterator();
        int docId;
        while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (this.filterFunc.isAccepted(reader, docId, this.ipcParser)) {
                finalBits.set(docId);
            }
        }
    }

    return new BitDocIdSet(finalBits);
}

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
    LeafReader reader = context.reader();
    //int docCount = reader.getDocCount(bm25stats.field);

    //BVA calculated for each document
    float[] BVA = new float[reader.maxDoc()];
    float sumOfAverageTermFrequencies = 0.0f;

    //length of each doc
    float[] Ld = new float[reader.maxDoc()];
    //the number of unique terms in the doc.
    float[] Td = new float[reader.maxDoc()];

    NumericDocValues norms = reader.getNormValues(bm25stats.field);

    //        int nulldocs = 0;
    for (int i = 0; i < reader.maxDoc(); i++) {
        Terms terms = reader.getTermVector(i, bm25stats.field);
        //norm should be the decoded length of doc d, Ld.
        float norm = norms == null ? k1 : bm25stats.cache[(byte) norms.get(i) & 0xFF];
        Ld[i] = norm;/*from  www.  j  ava 2s.  c  o m*/
        //using terms.size() returns Td, the number of unique terms in the doc.
        Td[i] = terms.size();
        //            if (terms == null) {
        //                nulldocs++;
        //                continue;
        //            }

        float averageTermFrequency = Ld[i] / Td[i];
        sumOfAverageTermFrequencies += averageTermFrequency;
    }
    //calculate mean average term frequency of all documents
    float mavgtf = sumOfAverageTermFrequencies / reader.maxDoc();

    //calculate B_VA for each document
    for (int i = 0; i < reader.maxDoc(); i++) {
        BVA[i] = 1 / (mavgtf * mavgtf) * Ld[i] / Td[i] + (1 - 1 / mavgtf) * Ld[i] / bm25stats.avgdl;
    }

    //        System.out.println("Null docs: "+nulldocs);
    //        System.out.println("Max docs: "+reader.maxDoc());
    //        System.out.println("Doc count: "+reader.getDocCount(bm25stats.field));
    //        System.out.println("max docs minus null docs: "+(reader.maxDoc() - nulldocs));

    return new BM25DocScorer(bm25stats, BVA);
}

From source file:nl.inl.blacklab.search.lucene.SpansNGrams.java

License:Apache License

/**
 * Constructs a SpansNGrams//from  w  w  w. j av  a  2 s.c o  m
 * @param ignoreLastToken if true, we assume the last token is always a special closing token and ignore it
 * @param reader the index reader, for getting field lengths
 * @param fieldName the field name, for getting field lengths
 * @param min minimum n-gram length
 * @param max maximum n-gram length
 */
public SpansNGrams(boolean ignoreLastToken, LeafReader reader, String fieldName, int min, int max) {
    maxDoc = reader == null ? -1 : reader.maxDoc();
    liveDocs = reader == null ? null : MultiFields.getLiveDocs(reader);
    subtractFromLength = ignoreLastToken ? 1 : 0;
    this.lengthGetter = new DocFieldLengthGetter(reader, fieldName);
    this.min = min;
    this.max = max;
}

From source file:org.alfresco.solr.query.AbstractAuthoritySetQuery.java

License:Open Source License

protected HybridBitSet getACLSet(String[] auths, String field, SolrIndexSearcher searcher) throws IOException {
    /*//ww w . j a va 2  s.  co m
    * Build a query that matches the authorities with a field in the ACL records in the index.
    */

    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
    for (String current : auths) {
        queryBuilder.add(new TermQuery(new Term(field, current)), BooleanClause.Occur.SHOULD);
    }

    /*
    *   Collect a docset containing the ACL records that match the query.
    *   This query will be in the filter cache. Ideally it would remain cached throughout the users session.
    */

    DocSet docSet = searcher.getDocSet(queryBuilder.build());

    DocIterator iterator = docSet.iterator();
    if (!iterator.hasNext()) {
        return new EmptyHybridBitSet();
    }

    //TODO : makes this configurable. For some systems this is huge and for others not big enough.
    HybridBitSet hybridBitSet = new HybridBitSet(60000000);

    /*
    * Collect the ACLID's from the matching acl records.
    * This is done in a separate step so the initial ACL query can be cached in the FilterCache
    * The initial ACL query may be expensive if the number of authorities is very large.
    */

    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    LeafReaderContext context = leaves.get(0);
    NumericDocValues aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID,
            context.reader());
    LeafReader reader = context.reader();
    int ceil = reader.maxDoc();
    int base = 0;
    int ord = 0;
    while (iterator.hasNext()) {
        int doc = iterator.nextDoc();
        if (doc >= ceil) {
            do {
                ++ord;
                context = leaves.get(ord);
                reader = context.reader();
                base = context.docBase;
                ceil = base + reader.maxDoc();
                aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
            } while (doc >= ceil);
        }

        if (aclValues != null) {
            long aclId = aclValues.get(doc - base);
            hybridBitSet.set(aclId);
        }
    }

    return hybridBitSet;
}

From source file:org.alfresco.solr.query.AbstractAuthoritySetQuery.java

License:Open Source License

protected BitsFilter getACLFilter(String[] auths, String field, SolrIndexSearcher searcher) throws IOException {
    HybridBitSet aclBits = getACLSet(auths, field, searcher);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<FixedBitSet> bitSets = new ArrayList<FixedBitSet>(leaves.size());

    for (LeafReaderContext readerContext : leaves) {
        LeafReader reader = readerContext.reader();
        int maxDoc = reader.maxDoc();
        FixedBitSet bits = new FixedBitSet(maxDoc);
        bitSets.add(bits);// w  w w .  j ava 2  s. c  om

        NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
        if (fieldValues != null) {
            for (int i = 0; i < maxDoc; i++) {
                long aclID = fieldValues.get(i);
                if (aclBits.get(aclID)) {
                    bits.set(i);
                }
            }
        }
    }

    return new BitsFilter(bitSets);
}

From source file:org.alfresco.solr.query.DocValuesCache.java

License:Open Source License

public static synchronized NumericDocValues getNumericDocValues(String field, LeafReader reader)
        throws IOException {
    WeakHashMap<Object, NumericDocValues> fieldCache = cache.get(field);

    if (fieldCache == null) {
        fieldCache = new WeakHashMap<Object, NumericDocValues>();
        cache.put(field, fieldCache);/*from w ww  .ja  va 2 s . c  o m*/
    }

    Object cacheKey = reader.getCoreCacheKey();
    NumericDocValues cachedValues = fieldCache.get(cacheKey);

    if (cachedValues == null) {
        NumericDocValues fieldValues = reader.getNumericDocValues(field);
        if (fieldValues == null) {
            return null;
        } else {
            int maxDoc = reader.maxDoc();
            boolean longs = false;
            int[] intValues = new int[maxDoc]; //Always start off with an int array.
            SettableDocValues settableValues = new IntValues(intValues);

            for (int i = 0; i < maxDoc; i++) {
                long value = fieldValues.get(i);
                if (value > Integer.MAX_VALUE && !longs) {
                    longs = true;
                    settableValues = new LongValues(intValues);
                }

                settableValues.set(i, value);
            }
            fieldCache.put(cacheKey, settableValues);
            return settableValues;
        }
    } else {
        return cachedValues;
    }
}

From source file:org.apache.solr.uninverting.DocTermOrds.java

License:Apache License

/** Call this only once (if you subclass!) */
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    }//from  w  w  w. j a va 2 s.co  m
    //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
    final long startTime = System.nanoTime();
    prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);

    final int maxDoc = reader.maxDoc();
    final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
    final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
    final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

    final Terms terms = reader.terms(field);
    if (terms == null) {
        // No terms
        return;
    }

    final TermsEnum te = terms.iterator();
    final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
    //System.out.println("seekStart=" + seekStart.utf8ToString());
    if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
        // No terms match
        return;
    }

    // For our "term index wrapper"
    final List<BytesRef> indexedTerms = new ArrayList<>();
    final PagedBytes indexedTermsBytes = new PagedBytes(15);

    // we need a minimum of 9 bytes, but round up to 12 since the space would
    // be wasted with most allocators anyway.
    byte[] tempArr = new byte[12];

    //
    // enumerate all terms, and build an intermediate form of the un-inverted field.
    //
    // During this intermediate form, every document has a (potential) byte[]
    // and the int[maxDoc()] array either contains the termNumber list directly
    // or the *end* offset of the termNumber list in its byte array (for faster
    // appending and faster creation of the final form).
    //
    // idea... if things are too large while building, we could do a range of docs
    // at a time (but it would be a fair amount slower to build)
    // could also do ranges in parallel to take advantage of multiple CPUs

    // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
    // values.  This requires going over the field first to find the most
    // frequent terms ahead of time.

    int termNum = 0;
    postingsEnum = null;

    // Loop begins with te positioned to first term (we call
    // seek above):
    for (;;) {
        final BytesRef t = te.term();
        if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
            break;
        }
        //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

        visitTerm(te, termNum);

        if ((termNum & indexIntervalMask) == 0) {
            // Index this term
            sizeOfIndexedStrings += t.length;
            BytesRef indexedTerm = new BytesRef();
            indexedTermsBytes.copy(t, indexedTerm);
            // TODO: really should 1) strip off useless suffix,
            // and 2) use FST not array/PagedBytes
            indexedTerms.add(indexedTerm);
        }

        final int df = te.docFreq();
        if (df <= maxTermDocFreq) {

            postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);

            // dF, but takes deletions into account
            int actualDF = 0;

            for (;;) {
                int doc = postingsEnum.nextDoc();
                if (doc == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }
                //System.out.println("  chunk=" + chunk + " docs");

                actualDF++;
                termInstances++;

                //System.out.println("    docID=" + doc);
                // add TNUM_OFFSET to the term number to make room for special reserved values:
                // 0 (end term) and 1 (index into byte array follows)
                int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                lastTerm[doc] = termNum;
                int val = index[doc];

                if ((val & 0xff) == 1) {
                    // index into byte array (actually the end of
                    // the doc-specific byte[] when building)
                    int pos = val >>> 8;
                    int ilen = vIntSize(delta);
                    byte[] arr = bytes[doc];
                    int newend = pos + ilen;
                    if (newend > arr.length) {
                        // We avoid a doubling strategy to lower memory usage.
                        // this faceting method isn't for docs with many terms.
                        // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                        // TODO: figure out what array lengths we can round up to w/o actually using more memory
                        // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                        // It should be safe to round up to the nearest 32 bits in any case.
                        int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
                        byte[] newarr = new byte[newLen];
                        System.arraycopy(arr, 0, newarr, 0, pos);
                        arr = newarr;
                        bytes[doc] = newarr;
                    }
                    pos = writeInt(delta, arr, pos);
                    index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                } else {
                    // OK, this int has data in it... find the end (a zero starting byte - not
                    // part of another number, hence not following a byte with the high bit set).
                    int ipos;
                    if (val == 0) {
                        ipos = 0;
                    } else if ((val & 0x0000ff80) == 0) {
                        ipos = 1;
                    } else if ((val & 0x00ff8000) == 0) {
                        ipos = 2;
                    } else if ((val & 0xff800000) == 0) {
                        ipos = 3;
                    } else {
                        ipos = 4;
                    }

                    //System.out.println("      ipos=" + ipos);

                    int endPos = writeInt(delta, tempArr, ipos);
                    //System.out.println("      endpos=" + endPos);
                    if (endPos <= 4) {
                        //System.out.println("      fits!");
                        // value will fit in the integer... move bytes back
                        for (int j = ipos; j < endPos; j++) {
                            val |= (tempArr[j] & 0xff) << (j << 3);
                        }
                        index[doc] = val;
                    } else {
                        // value won't fit... move integer into byte[]
                        for (int j = 0; j < ipos; j++) {
                            tempArr[j] = (byte) val;
                            val >>>= 8;
                        }
                        // point at the end index in the byte[]
                        index[doc] = (endPos << 8) | 1;
                        bytes[doc] = tempArr;
                        tempArr = new byte[12];
                    }
                }
            }
            setActualDocFreq(termNum, actualDF);
        }

        termNum++;
        if (te.next() == null) {
            break;
        }
    }

    numTermsInField = termNum;

    long midPoint = System.nanoTime();

    if (termInstances == 0) {
        // we didn't invert anything
        // lower memory consumption.
        tnums = null;
    } else {

        this.index = index;

        //
        // transform intermediate form into the final form, building a single byte[]
        // at a time, and releasing the intermediate byte[]s as we go to avoid
        // increasing the memory footprint.
        //

        for (int pass = 0; pass < 256; pass++) {
            byte[] target = tnums[pass];
            int pos = 0; // end in target;
            if (target != null) {
                pos = target.length;
            } else {
                target = new byte[4096];
            }

            // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
            // where pp is the pass (which array we are building), and xx is all values.
            // each pass shares the same byte[] for termNumber lists.
            for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) {
                int lim = Math.min(docbase + (1 << 16), maxDoc);
                for (int doc = docbase; doc < lim; doc++) {
                    //System.out.println("  pass=" + pass + " process docID=" + doc);
                    int val = index[doc];
                    if ((val & 0xff) == 1) {
                        int len = val >>> 8;
                        //System.out.println("    ptr pos=" + pos);
                        index[doc] = (pos << 8) | 1; // change index to point to start of array
                        if ((pos & 0xff000000) != 0) {
                            // we only have 24 bits for the array index
                            throw new IllegalStateException(
                                    "Too many values for UnInvertedField faceting on field " + field);
                        }
                        byte[] arr = bytes[doc];
                        /*
                        for(byte b : arr) {
                          //System.out.println("      b=" + Integer.toHexString((int) b));
                        }
                        */
                        bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                        if (target.length <= pos + len) {
                            int newlen = target.length;
                            /*** we don't have to worry about the array getting too large
                             * since the "pos" param will overflow first (only 24 bits available)
                            if ((newlen<<1) <= 0) {
                              // overflow...
                              newlen = Integer.MAX_VALUE;
                              if (newlen <= pos + len) {
                                throw new SolrException(400,"Too many terms to uninvert field!");
                              }
                            } else {
                              while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                            }
                            ****/
                            while (newlen <= pos + len)
                                newlen <<= 1; // doubling strategy                 
                            byte[] newtarget = new byte[newlen];
                            System.arraycopy(target, 0, newtarget, 0, pos);
                            target = newtarget;
                        }
                        System.arraycopy(arr, 0, target, pos, len);
                        pos += len + 1; // skip single byte at end and leave it 0 for terminator
                    }
                }
            }

            // shrink array
            if (pos < target.length) {
                byte[] newtarget = new byte[pos];
                System.arraycopy(target, 0, newtarget, 0, pos);
                target = newtarget;
            }

            tnums[pass] = target;

            if ((pass << 16) > maxDoc)
                break;
        }

    }
    indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);

    long endTime = System.nanoTime();

    total_time = (int) TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS);
    phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint - startTime, TimeUnit.NANOSECONDS);
}