Example usage for org.apache.lucene.index LeafReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *//*  w  ww. j a  v a2 s.  c o  m*/
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.meizu.nlp.classification.utils.DataSplitterTest.java

License:Apache License

public static void assertSplit(LeafReader originalIndex, double testRatio, double crossValidationRatio,
        String... fieldNames) throws Exception {

    BaseDirectoryWrapper trainingIndex = newDirectory();
    BaseDirectoryWrapper testIndex = newDirectory();
    BaseDirectoryWrapper crossValidationIndex = newDirectory();

    try {/*from  w  w w . j a v a 2 s.  c  o m*/
        DatasetSplitter datasetSplitter = new DatasetSplitter(testRatio, crossValidationRatio);
        datasetSplitter.split(originalIndex, trainingIndex, testIndex, crossValidationIndex,
                new MockAnalyzer(random()), fieldNames);

        assertNotNull(trainingIndex);
        assertNotNull(testIndex);
        assertNotNull(crossValidationIndex);

        DirectoryReader trainingReader = DirectoryReader.open(trainingIndex);
        assertTrue((int) (originalIndex.maxDoc() * (1d - testRatio - crossValidationRatio)) == trainingReader
                .maxDoc());
        DirectoryReader testReader = DirectoryReader.open(testIndex);
        assertTrue((int) (originalIndex.maxDoc() * testRatio) == testReader.maxDoc());
        DirectoryReader cvReader = DirectoryReader.open(crossValidationIndex);
        assertTrue((int) (originalIndex.maxDoc() * crossValidationRatio) == cvReader.maxDoc());

        trainingReader.close();
        testReader.close();
        cvReader.close();
        closeQuietly(trainingReader);
        closeQuietly(testReader);
        closeQuietly(cvReader);
    } finally {
        if (trainingIndex != null) {
            trainingIndex.close();
        }
        if (testIndex != null) {
            testIndex.close();
        }
        if (crossValidationIndex != null) {
            crossValidationIndex.close();
        }
    }
}

From source file:de.unihildesheim.iw.lucene.search.EmptyFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    FixedBitSet checkBits;/*from w  ww.j  a v  a 2s.  co m*/
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();

    BitSet finalBits = new SparseFixedBitSet(maxDoc);
    if (acceptDocs == null) {
        checkBits = BitsUtils.bits2FixedBitSet(reader.getLiveDocs());
        if (checkBits == null) {
            // all live
            checkBits = new FixedBitSet(maxDoc);
            checkBits.set(0, checkBits.length());
        }
    } else {
        checkBits = BitsUtils.bits2FixedBitSet(acceptDocs);
    }

    @Nullable
    final Terms terms = reader.terms(this.field);
    if (terms != null) {
        final int termsDocCount = terms.getDocCount();

        if (termsDocCount != 0) {
            if (termsDocCount == maxDoc) {
                // all matching
                finalBits = checkBits;
            } else {
                @Nullable
                final Terms t = reader.terms(this.field);
                if (t != null) {
                    PostingsEnum pe = null;
                    final TermsEnum te = t.iterator(null);
                    int docId;
                    while (te.next() != null) {
                        pe = te.postings(checkBits, pe, (int) PostingsEnum.NONE);
                        while ((docId = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (checkBits.getAndClear(docId)) {
                                finalBits.set(docId);
                            }
                        }
                    }
                }
            }
        }
    }
    return new BitDocIdSet(finalBits);
}

From source file:de.unihildesheim.iw.lucene.search.IPCFieldFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(@NotNull final LeafReaderContext context, @Nullable final Bits acceptDocs)
        throws IOException {
    final LeafReader reader = context.reader();
    final int maxDoc = reader.maxDoc();
    final BitSet finalBits = new SparseFixedBitSet(maxDoc);

    if (acceptDocs == null) {
        // check all
        for (int i = 0; i < maxDoc; i++) {
            if (this.filterFunc.isAccepted(reader, i, this.ipcParser)) {
                finalBits.set(i);/*from  w  w w  .j av a 2  s  . c  o  m*/
            }
        }
    } else {
        final BitSet checkBits = BitsUtils.bits2BitSet(acceptDocs);
        final DocIdSetIterator disi = new BitDocIdSet(checkBits).iterator();
        int docId;
        while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            if (this.filterFunc.isAccepted(reader, docId, this.ipcParser)) {
                finalBits.set(docId);
            }
        }
    }

    return new BitDocIdSet(finalBits);
}

From source file:main.BM25VASimilarity.java

License:Apache License

@Override
public final SimScorer simScorer(SimWeight stats, LeafReaderContext context) throws IOException {
    BM25Stats bm25stats = (BM25Stats) stats;
    LeafReader reader = context.reader();
    //int docCount = reader.getDocCount(bm25stats.field);

    //BVA calculated for each document
    float[] BVA = new float[reader.maxDoc()];
    float sumOfAverageTermFrequencies = 0.0f;

    //length of each doc
    float[] Ld = new float[reader.maxDoc()];
    //the number of unique terms in the doc.
    float[] Td = new float[reader.maxDoc()];

    NumericDocValues norms = reader.getNormValues(bm25stats.field);

    //        int nulldocs = 0;
    for (int i = 0; i < reader.maxDoc(); i++) {
        Terms terms = reader.getTermVector(i, bm25stats.field);
        //norm should be the decoded length of doc d, Ld.
        float norm = norms == null ? k1 : bm25stats.cache[(byte) norms.get(i) & 0xFF];
        Ld[i] = norm;/*from  www.  j  ava 2s.  c  o m*/
        //using terms.size() returns Td, the number of unique terms in the doc.
        Td[i] = terms.size();
        //            if (terms == null) {
        //                nulldocs++;
        //                continue;
        //            }

        float averageTermFrequency = Ld[i] / Td[i];
        sumOfAverageTermFrequencies += averageTermFrequency;
    }
    //calculate mean average term frequency of all documents
    float mavgtf = sumOfAverageTermFrequencies / reader.maxDoc();

    //calculate B_VA for each document
    for (int i = 0; i < reader.maxDoc(); i++) {
        BVA[i] = 1 / (mavgtf * mavgtf) * Ld[i] / Td[i] + (1 - 1 / mavgtf) * Ld[i] / bm25stats.avgdl;
    }

    //        System.out.println("Null docs: "+nulldocs);
    //        System.out.println("Max docs: "+reader.maxDoc());
    //        System.out.println("Doc count: "+reader.getDocCount(bm25stats.field));
    //        System.out.println("max docs minus null docs: "+(reader.maxDoc() - nulldocs));

    return new BM25DocScorer(bm25stats, BVA);
}

From source file:nl.inl.blacklab.search.lucene.SpansNGrams.java

License:Apache License

/**
 * Constructs a SpansNGrams//from  w  w  w. j av  a  2 s.c o  m
 * @param ignoreLastToken if true, we assume the last token is always a special closing token and ignore it
 * @param reader the index reader, for getting field lengths
 * @param fieldName the field name, for getting field lengths
 * @param min minimum n-gram length
 * @param max maximum n-gram length
 */
public SpansNGrams(boolean ignoreLastToken, LeafReader reader, String fieldName, int min, int max) {
    maxDoc = reader == null ? -1 : reader.maxDoc();
    liveDocs = reader == null ? null : MultiFields.getLiveDocs(reader);
    subtractFromLength = ignoreLastToken ? 1 : 0;
    this.lengthGetter = new DocFieldLengthGetter(reader, fieldName);
    this.min = min;
    this.max = max;
}

From source file:org.alfresco.solr.query.AbstractAuthoritySetQuery.java

License:Open Source License

protected HybridBitSet getACLSet(String[] auths, String field, SolrIndexSearcher searcher) throws IOException {
    /*//ww w . j a va 2  s.  co m
    * Build a query that matches the authorities with a field in the ACL records in the index.
    */

    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
    for (String current : auths) {
        queryBuilder.add(new TermQuery(new Term(field, current)), BooleanClause.Occur.SHOULD);
    }

    /*
    *   Collect a docset containing the ACL records that match the query.
    *   This query will be in the filter cache. Ideally it would remain cached throughout the users session.
    */

    DocSet docSet = searcher.getDocSet(queryBuilder.build());

    DocIterator iterator = docSet.iterator();
    if (!iterator.hasNext()) {
        return new EmptyHybridBitSet();
    }

    //TODO : makes this configurable. For some systems this is huge and for others not big enough.
    HybridBitSet hybridBitSet = new HybridBitSet(60000000);

    /*
    * Collect the ACLID's from the matching acl records.
    * This is done in a separate step so the initial ACL query can be cached in the FilterCache
    * The initial ACL query may be expensive if the number of authorities is very large.
    */

    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    LeafReaderContext context = leaves.get(0);
    NumericDocValues aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID,
            context.reader());
    LeafReader reader = context.reader();
    int ceil = reader.maxDoc();
    int base = 0;
    int ord = 0;
    while (iterator.hasNext()) {
        int doc = iterator.nextDoc();
        if (doc >= ceil) {
            do {
                ++ord;
                context = leaves.get(ord);
                reader = context.reader();
                base = context.docBase;
                ceil = base + reader.maxDoc();
                aclValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
            } while (doc >= ceil);
        }

        if (aclValues != null) {
            long aclId = aclValues.get(doc - base);
            hybridBitSet.set(aclId);
        }
    }

    return hybridBitSet;
}

From source file:org.alfresco.solr.query.AbstractAuthoritySetQuery.java

License:Open Source License

protected BitsFilter getACLFilter(String[] auths, String field, SolrIndexSearcher searcher) throws IOException {
    HybridBitSet aclBits = getACLSet(auths, field, searcher);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<FixedBitSet> bitSets = new ArrayList<FixedBitSet>(leaves.size());

    for (LeafReaderContext readerContext : leaves) {
        LeafReader reader = readerContext.reader();
        int maxDoc = reader.maxDoc();
        FixedBitSet bits = new FixedBitSet(maxDoc);
        bitSets.add(bits);// w  w w .  j ava 2  s. c  om

        NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
        if (fieldValues != null) {
            for (int i = 0; i < maxDoc; i++) {
                long aclID = fieldValues.get(i);
                if (aclBits.get(aclID)) {
                    bits.set(i);
                }
            }
        }
    }

    return new BitsFilter(bitSets);
}

From source file:org.alfresco.solr.query.DocValuesCache.java

License:Open Source License

public static synchronized NumericDocValues getNumericDocValues(String field, LeafReader reader)
        throws IOException {
    WeakHashMap<Object, NumericDocValues> fieldCache = cache.get(field);

    if (fieldCache == null) {
        fieldCache = new WeakHashMap<Object, NumericDocValues>();
        cache.put(field, fieldCache);/*from w ww  .ja  va 2 s . c  o m*/
    }

    Object cacheKey = reader.getCoreCacheKey();
    NumericDocValues cachedValues = fieldCache.get(cacheKey);

    if (cachedValues == null) {
        NumericDocValues fieldValues = reader.getNumericDocValues(field);
        if (fieldValues == null) {
            return null;
        } else {
            int maxDoc = reader.maxDoc();
            boolean longs = false;
            int[] intValues = new int[maxDoc]; //Always start off with an int array.
            SettableDocValues settableValues = new IntValues(intValues);

            for (int i = 0; i < maxDoc; i++) {
                long value = fieldValues.get(i);
                if (value > Integer.MAX_VALUE && !longs) {
                    longs = true;
                    settableValues = new LongValues(intValues);
                }

                settableValues.set(i, value);
            }
            fieldCache.put(cacheKey, settableValues);
            return settableValues;
        }
    } else {
        return cachedValues;
    }
}

From source file:org.apache.solr.uninverting.DocTermOrds.java

License:Apache License

/** Call this only once (if you subclass!) */
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
        throw new IllegalStateException(
                "Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    }//from  w  w  w. j a va 2 s.co  m
    //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
    final long startTime = System.nanoTime();
    prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);

    final int maxDoc = reader.maxDoc();
    final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
    final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
    final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)

    final Terms terms = reader.terms(field);
    if (terms == null) {
        // No terms
        return;
    }

    final TermsEnum te = terms.iterator();
    final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
    //System.out.println("seekStart=" + seekStart.utf8ToString());
    if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
        // No terms match
        return;
    }

    // For our "term index wrapper"
    final List<BytesRef> indexedTerms = new ArrayList<>();
    final PagedBytes indexedTermsBytes = new PagedBytes(15);

    // we need a minimum of 9 bytes, but round up to 12 since the space would
    // be wasted with most allocators anyway.
    byte[] tempArr = new byte[12];

    //
    // enumerate all terms, and build an intermediate form of the un-inverted field.
    //
    // During this intermediate form, every document has a (potential) byte[]
    // and the int[maxDoc()] array either contains the termNumber list directly
    // or the *end* offset of the termNumber list in its byte array (for faster
    // appending and faster creation of the final form).
    //
    // idea... if things are too large while building, we could do a range of docs
    // at a time (but it would be a fair amount slower to build)
    // could also do ranges in parallel to take advantage of multiple CPUs

    // OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
    // values.  This requires going over the field first to find the most
    // frequent terms ahead of time.

    int termNum = 0;
    postingsEnum = null;

    // Loop begins with te positioned to first term (we call
    // seek above):
    for (;;) {
        final BytesRef t = te.term();
        if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
            break;
        }
        //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);

        visitTerm(te, termNum);

        if ((termNum & indexIntervalMask) == 0) {
            // Index this term
            sizeOfIndexedStrings += t.length;
            BytesRef indexedTerm = new BytesRef();
            indexedTermsBytes.copy(t, indexedTerm);
            // TODO: really should 1) strip off useless suffix,
            // and 2) use FST not array/PagedBytes
            indexedTerms.add(indexedTerm);
        }

        final int df = te.docFreq();
        if (df <= maxTermDocFreq) {

            postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);

            // dF, but takes deletions into account
            int actualDF = 0;

            for (;;) {
                int doc = postingsEnum.nextDoc();
                if (doc == DocIdSetIterator.NO_MORE_DOCS) {
                    break;
                }
                //System.out.println("  chunk=" + chunk + " docs");

                actualDF++;
                termInstances++;

                //System.out.println("    docID=" + doc);
                // add TNUM_OFFSET to the term number to make room for special reserved values:
                // 0 (end term) and 1 (index into byte array follows)
                int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
                lastTerm[doc] = termNum;
                int val = index[doc];

                if ((val & 0xff) == 1) {
                    // index into byte array (actually the end of
                    // the doc-specific byte[] when building)
                    int pos = val >>> 8;
                    int ilen = vIntSize(delta);
                    byte[] arr = bytes[doc];
                    int newend = pos + ilen;
                    if (newend > arr.length) {
                        // We avoid a doubling strategy to lower memory usage.
                        // this faceting method isn't for docs with many terms.
                        // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
                        // TODO: figure out what array lengths we can round up to w/o actually using more memory
                        // (how much space does a byte[] take up?  Is data preceded by a 32 bit length only?
                        // It should be safe to round up to the nearest 32 bits in any case.
                        int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
                        byte[] newarr = new byte[newLen];
                        System.arraycopy(arr, 0, newarr, 0, pos);
                        arr = newarr;
                        bytes[doc] = newarr;
                    }
                    pos = writeInt(delta, arr, pos);
                    index[doc] = (pos << 8) | 1; // update pointer to end index in byte[]
                } else {
                    // OK, this int has data in it... find the end (a zero starting byte - not
                    // part of another number, hence not following a byte with the high bit set).
                    int ipos;
                    if (val == 0) {
                        ipos = 0;
                    } else if ((val & 0x0000ff80) == 0) {
                        ipos = 1;
                    } else if ((val & 0x00ff8000) == 0) {
                        ipos = 2;
                    } else if ((val & 0xff800000) == 0) {
                        ipos = 3;
                    } else {
                        ipos = 4;
                    }

                    //System.out.println("      ipos=" + ipos);

                    int endPos = writeInt(delta, tempArr, ipos);
                    //System.out.println("      endpos=" + endPos);
                    if (endPos <= 4) {
                        //System.out.println("      fits!");
                        // value will fit in the integer... move bytes back
                        for (int j = ipos; j < endPos; j++) {
                            val |= (tempArr[j] & 0xff) << (j << 3);
                        }
                        index[doc] = val;
                    } else {
                        // value won't fit... move integer into byte[]
                        for (int j = 0; j < ipos; j++) {
                            tempArr[j] = (byte) val;
                            val >>>= 8;
                        }
                        // point at the end index in the byte[]
                        index[doc] = (endPos << 8) | 1;
                        bytes[doc] = tempArr;
                        tempArr = new byte[12];
                    }
                }
            }
            setActualDocFreq(termNum, actualDF);
        }

        termNum++;
        if (te.next() == null) {
            break;
        }
    }

    numTermsInField = termNum;

    long midPoint = System.nanoTime();

    if (termInstances == 0) {
        // we didn't invert anything
        // lower memory consumption.
        tnums = null;
    } else {

        this.index = index;

        //
        // transform intermediate form into the final form, building a single byte[]
        // at a time, and releasing the intermediate byte[]s as we go to avoid
        // increasing the memory footprint.
        //

        for (int pass = 0; pass < 256; pass++) {
            byte[] target = tnums[pass];
            int pos = 0; // end in target;
            if (target != null) {
                pos = target.length;
            } else {
                target = new byte[4096];
            }

            // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
            // where pp is the pass (which array we are building), and xx is all values.
            // each pass shares the same byte[] for termNumber lists.
            for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) {
                int lim = Math.min(docbase + (1 << 16), maxDoc);
                for (int doc = docbase; doc < lim; doc++) {
                    //System.out.println("  pass=" + pass + " process docID=" + doc);
                    int val = index[doc];
                    if ((val & 0xff) == 1) {
                        int len = val >>> 8;
                        //System.out.println("    ptr pos=" + pos);
                        index[doc] = (pos << 8) | 1; // change index to point to start of array
                        if ((pos & 0xff000000) != 0) {
                            // we only have 24 bits for the array index
                            throw new IllegalStateException(
                                    "Too many values for UnInvertedField faceting on field " + field);
                        }
                        byte[] arr = bytes[doc];
                        /*
                        for(byte b : arr) {
                          //System.out.println("      b=" + Integer.toHexString((int) b));
                        }
                        */
                        bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
                        if (target.length <= pos + len) {
                            int newlen = target.length;
                            /*** we don't have to worry about the array getting too large
                             * since the "pos" param will overflow first (only 24 bits available)
                            if ((newlen<<1) <= 0) {
                              // overflow...
                              newlen = Integer.MAX_VALUE;
                              if (newlen <= pos + len) {
                                throw new SolrException(400,"Too many terms to uninvert field!");
                              }
                            } else {
                              while (newlen <= pos + len) newlen<<=1;  // doubling strategy
                            }
                            ****/
                            while (newlen <= pos + len)
                                newlen <<= 1; // doubling strategy                 
                            byte[] newtarget = new byte[newlen];
                            System.arraycopy(target, 0, newtarget, 0, pos);
                            target = newtarget;
                        }
                        System.arraycopy(arr, 0, target, pos, len);
                        pos += len + 1; // skip single byte at end and leave it 0 for terminator
                    }
                }
            }

            // shrink array
            if (pos < target.length) {
                byte[] newtarget = new byte[pos];
                System.arraycopy(target, 0, newtarget, 0, pos);
                target = newtarget;
            }

            tnums[pass] = target;

            if ((pass << 16) > maxDoc)
                break;
        }

    }
    indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);

    long endTime = System.nanoTime();

    total_time = (int) TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS);
    phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint - startTime, TimeUnit.NANOSECONDS);
}