Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.apache.nutch.indexer.IndexSorterArquivoWeb.java

License:Apache License

/**
 * Sort the documents by score/*w  ww.  jav  a2  s .  c o  m*/
 * @param reader
 * @param searcher
 * @return
 * @throws IOException
 */
//private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException {
private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    //byte[] boosts = reader.norms("site"); TODO MC
    /* TODO MC */
    Document docMeta;
    Pattern includes = Pattern.compile("\\|");
    String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, "");
    String includeExtensions[] = includes.split(value);
    Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>();
    for (int i = 0; i < includeExtensions.length; i++) {
        validExtensions.put(includeExtensions[i], true);
        System.out.println("extension boosted " + includeExtensions[i]);
    }
    /* TODO MC */

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
        float score;
        if (reader.isDeleted(oldDoc)) {
            //score = 0.0f;       
            score = -1f; // TODO MC
        } else {
            //score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC
            /* TODO MC */
            docMeta = searcher.doc(oldDoc);
            if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores 
                score = -0.5f;
            } else {
                score = Integer.parseInt(docMeta.get("inlinks"));
                /*
                if (score==0) {
                   score=0.001f; // TODO MC - to not erase
                }
                */
            }
            /* TODO MC */
            //System.out.println("Score for old document "+oldDoc+" is "+score+" and type "+docMeta.get("subType")); // TODO MC debug remove
        }
        DocScore docScore = new DocScore();
        docScore.doc = oldDoc;
        docScore.score = score;
        newToOld[oldDoc] = docScore;
    }

    System.out.println("Sorting " + newToOld.length + " documents.");
    Arrays.sort(newToOld);
    //HeapSorter.sort(newToOld); // TODO MC - due to the lack of space

    /* TODO MC
    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
      DocScore docScore = newToOld[newDoc];
      //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC
      oldToNew[docScore.oldDoc] = newDoc; // TODO MC
    } 
    */

    /* TODO MC *
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
       DocScore docScore = newToOld[newDoc];
       System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove
    }
    * TODO MC */

    //return oldToNew; TODO MC
    return newToOld; // TODO MC
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

private void hashDuplicatesHelper(Path index, String url) throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index });
    FsDirectory dir = new FsDirectory(fs, new Path(index, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }//from  ww  w .j  av  a 2 s  . co  m
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check url", url, doc.get("url"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

public void testUrlDuplicates() throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index2 });
    FsDirectory dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    MD5Hash hash = MD5Hash.digest("2");
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }//w ww . j  a v  a  2s.  c  o m
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check hash", hash.toString(), doc.get("digest"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestDeleteDuplicates.java

License:Apache License

public void testMixedDuplicates() throws Exception {
    DeleteDuplicates dedup = new DeleteDuplicates(conf);
    dedup.dedup(new Path[] { index1, index2 });
    FsDirectory dir = new FsDirectory(fs, new Path(index1, "part-0000"), false, conf);
    IndexReader reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }/*  ww  w.ja  v  a  2s .c o  m*/
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check url", "http://www.example.com/2", doc.get("url"));
        System.out.println(doc);
    }
    reader.close();
    dir = new FsDirectory(fs, new Path(index2, "part-0000"), false, conf);
    reader = IndexReader.open(dir);
    assertEquals("only one doc left", reader.numDocs(), 1);
    MD5Hash hash = MD5Hash.digest("2");
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.isDeleted(i)) {
            System.out.println("-doc " + i + " deleted");
            continue;
        }
        Document doc = reader.document(i);
        // make sure we got the right one
        assertEquals("check hash", hash.toString(), doc.get("digest"));
        System.out.println(doc);
    }
    reader.close();
}

From source file:org.apache.nutch.indexer.TestIndexSorter.java

License:Apache License

public void testSorting() throws Exception {
    IndexSorter sorter = new IndexSorter(conf);
    sorter.sort(testDir);/*from   w w  w  .ja  v  a 2  s. co m*/

    // read back documents
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(testDir, INDEX_SORTED)));
    assertEquals(reader.numDocs(), NUM_DOCS);
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        Field f = doc.getField("content");
        assertNull(f);
        f = doc.getField("boost");
        float boost = Similarity.decodeNorm((byte) (NUM_DOCS - i));
        String cmp = String.valueOf(boost);
        assertEquals(cmp, f.stringValue());
    }
    reader.close();
}

From source file:org.apache.nutch.tools.SegmentMergeTool.java

License:Apache License

/** Run the tool, periodically reporting progress. */
public void run() {
    start = System.currentTimeMillis();
    stage = SegmentMergeStatus.STAGE_OPENING;
    long delta;/* w  w w.j a va  2 s. co  m*/
    LOG.info("* Opening " + allsegdirs.size() + " segments:");
    try {
        segdirs = new ArrayList();
        // open all segments
        for (int i = 0; i < allsegdirs.size(); i++) {
            File dir = (File) allsegdirs.get(i);
            SegmentReader sr = null;
            try {
                // try to autofix it if corrupted...
                sr = new SegmentReader(nfs, dir, true);
            } catch (Exception e) {
                // this segment is hosed beyond repair, don't use it
                LOG.warning("* Segment " + dir.getName() + " is corrupt beyond repair; skipping it.");
                continue;
            }
            segdirs.add(dir);
            totalRecords += sr.size;
            LOG.info(" - segment " + dir.getName() + ": " + sr.size + " records.");
            readers.put(dir.getName(), sr);
        }
        long total = totalRecords;
        LOG.info("* TOTAL " + total + " input records in " + segdirs.size() + " segments.");
        LOG.info("* Creating master index...");
        stage = SegmentMergeStatus.STAGE_MASTERIDX;
        // XXX Note that Lucene indexes don't work with NutchFileSystem for now.
        // XXX For now always assume LocalFileSystem here...
        Vector masters = new Vector();
        File fsmtIndexDir = new File(output, ".fastmerge_index");
        File masterDir = new File(fsmtIndexDir, "0");
        if (!masterDir.mkdirs()) {
            LOG.severe("Could not create a master index dir: " + masterDir);
            return;
        }
        masters.add(masterDir);
        IndexWriter iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
        iw.setUseCompoundFile(false);
        iw.setMergeFactor(INDEX_MERGE_FACTOR);
        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
        long s1 = System.currentTimeMillis();
        Iterator it = readers.values().iterator();
        processedRecords = 0L;
        delta = System.currentTimeMillis();
        while (it.hasNext()) {
            SegmentReader sr = (SegmentReader) it.next();
            String name = sr.segmentDir.getName();
            FetcherOutput fo = new FetcherOutput();
            for (long i = 0; i < sr.size; i++) {
                try {
                    if (!sr.get(i, fo, null, null, null))
                        break;

                    Document doc = new Document();

                    // compute boost
                    float boost = IndexSegment.calculateBoost(fo.getFetchListEntry().getPage().getScore(),
                            scorePower, boostByLinkCount, fo.getAnchors().length);
                    //            doc.add(new Field("sd", name + "|" + i, true, false, false));
                    //            doc.add(new Field("uh", MD5Hash.digest(fo.getUrl().toString()).toString(), true, true, false));
                    //            doc.add(new Field("ch", fo.getMD5Hash().toString(), true, true, false));
                    //            doc.add(new Field("time", DateField.timeToString(fo.getFetchDate()), true, false, false));
                    //            doc.add(new Field("score", boost + "", true, false, false));
                    //            doc.add(new Field("ul", fo.getUrl().toString().length() + "", true, false, false));
                    iw.addDocument(doc);
                    processedRecords++;
                    if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                        LOG.info(" Processed " + processedRecords + " records ("
                                + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta)
                                + " rec/s)");
                        delta = System.currentTimeMillis();
                    }
                    if (processedRecords > 0 && (processedRecords % INDEX_SIZE == 0)) {
                        iw.optimize();
                        iw.close();
                        LOG.info(" - creating next subindex...");
                        masterDir = new File(fsmtIndexDir, "" + masters.size());
                        if (!masterDir.mkdirs()) {
                            LOG.severe("Could not create a master index dir: " + masterDir);
                            return;
                        }
                        masters.add(masterDir);
                        iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
                        iw.setUseCompoundFile(false);
                        iw.setMergeFactor(INDEX_MERGE_FACTOR);
                        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
                    }
                } catch (Throwable t) {
                    // we can assume the data is invalid from now on - break here
                    LOG.info(" - segment " + name + " truncated to " + (i + 1) + " records");
                    break;
                }
            }
        }
        iw.optimize();
        LOG.info("* Creating index took " + (System.currentTimeMillis() - s1) + " ms");
        s1 = System.currentTimeMillis();
        // merge all other indexes using the latest IndexWriter (still open):
        if (masters.size() > 1) {
            LOG.info(" - merging subindexes...");
            stage = SegmentMergeStatus.STAGE_MERGEIDX;
            IndexReader[] ireaders = new IndexReader[masters.size() - 1];
            for (int i = 0; i < masters.size() - 1; i++)
                ireaders[i] = IndexReader.open((File) masters.get(i));
            iw.addIndexes(ireaders);
            for (int i = 0; i < masters.size() - 1; i++) {
                ireaders[i].close();
                FileUtil.fullyDelete((File) masters.get(i));
            }
        }
        iw.close();
        LOG.info("* Optimizing index took " + (System.currentTimeMillis() - s1) + " ms");
        LOG.info("* Removing duplicate entries...");
        stage = SegmentMergeStatus.STAGE_DEDUP;
        IndexReader ir = IndexReader.open(masterDir);
        int i = 0;
        long cnt = 0L;
        processedRecords = 0L;
        s1 = System.currentTimeMillis();
        delta = s1;
        TermEnum te = ir.terms();
        while (te.next()) {
            Term t = te.term();
            if (t == null)
                continue;
            if (!(t.field().equals("ch") || t.field().equals("uh")))
                continue;
            cnt++;
            processedRecords = cnt / 2;
            if (cnt > 0 && (cnt % (LOG_STEP * 2) == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            // Enumerate all docs with the same URL hash or content hash
            TermDocs td = ir.termDocs(t);
            if (td == null)
                continue;
            if (t.field().equals("uh")) {
                // Keep only the latest version of the document with
                // the same url hash. Note: even if the content
                // hash is identical, other metadata may be different, so even
                // in this case it makes sense to keep the latest version.
                int id = -1;
                String time = null;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (time == null) {
                            time = doc.get("time");
                            id = docid;
                            continue;
                        }
                        String dtime = doc.get("time");
                        // "time" is a DateField, and can be compared lexicographically
                        if (dtime.compareTo(time) > 0) {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            time = dtime;
                            id = docid;
                        } else {
                            ir.deleteDocument(docid);
                        }
                    }
                }
            } else if (t.field().equals("ch")) {
                // Keep only the version of the document with
                // the highest score, and then with the shortest url.
                int id = -1;
                int ul = 0;
                float score = 0.0f;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (ul == 0) {
                            try {
                                ul = Integer.parseInt(doc.get("ul"));
                                score = Float.parseFloat(doc.get("score"));
                            } catch (Exception e) {
                            }
                            ;
                            id = docid;
                            continue;
                        }
                        int dul = 0;
                        float dscore = 0.0f;
                        try {
                            dul = Integer.parseInt(doc.get("ul"));
                            dscore = Float.parseFloat(doc.get("score"));
                        } catch (Exception e) {
                        }
                        ;
                        int cmp = Float.compare(dscore, score);
                        if (cmp == 0) {
                            // equal scores, select the one with shortest url
                            if (dul < ul) {
                                if (id != -1) {
                                    ir.deleteDocument(id);
                                }
                                ul = dul;
                                id = docid;
                            } else {
                                ir.deleteDocument(docid);
                            }
                        } else if (cmp < 0) {
                            ir.deleteDocument(docid);
                        } else {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            ul = dul;
                            id = docid;
                        }
                    }
                }
            }
        }
        //
        // keep the IndexReader open...
        //

        LOG.info("* Deduplicating took " + (System.currentTimeMillis() - s1) + " ms");
        stage = SegmentMergeStatus.STAGE_WRITING;
        processedRecords = 0L;
        Vector outDirs = new Vector();
        File outDir = new File(output, SegmentWriter.getNewSegmentName());
        outDirs.add(outDir);
        LOG.info("* Merging all segments into " + output.getName());
        s1 = System.currentTimeMillis();
        delta = s1;
        nfs.mkdirs(outDir);
        SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
        LOG.fine(" - opening first output segment in " + outDir.getName());
        FetcherOutput fo = new FetcherOutput();
        Content co = new Content();
        ParseText pt = new ParseText();
        ParseData pd = new ParseData();
        int outputCnt = 0;
        for (int n = 0; n < ir.maxDoc(); n++) {
            if (ir.isDeleted(n)) {
                //System.out.println("-del");
                continue;
            }
            Document doc = ir.document(n);
            String segDoc = doc.get("sd");
            int idx = segDoc.indexOf('|');
            String segName = segDoc.substring(0, idx);
            String docName = segDoc.substring(idx + 1);
            SegmentReader sr = (SegmentReader) readers.get(segName);
            long docid;
            try {
                docid = Long.parseLong(docName);
            } catch (Exception e) {
                continue;
            }
            try {
                // get data from the reader
                sr.get(docid, fo, co, pt, pd);
            } catch (Throwable thr) {
                // don't break the loop, because only one of the segments
                // may be corrupted...
                LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName()
                        + " - skipping.");
                continue;
            }
            sw.append(fo, co, pt, pd);
            outputCnt++;
            processedRecords++;
            if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            if (processedRecords % maxCount == 0) {
                sw.close();
                outDir = new File(output, SegmentWriter.getNewSegmentName());
                LOG.fine(" - starting next output segment in " + outDir.getName());
                nfs.mkdirs(outDir);
                sw = new SegmentWriter(nfs, outDir, true);
                outDirs.add(outDir);
            }
        }
        LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms");
        ir.close();
        sw.close();
        FileUtil.fullyDelete(fsmtIndexDir);
        for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) {
            SegmentReader sr = (SegmentReader) readers.get(iter.next());
            sr.close();
        }
        if (runIndexer) {
            stage = SegmentMergeStatus.STAGE_INDEXING;
            totalRecords = outDirs.size();
            processedRecords = 0L;
            LOG.info("* Creating new segment index(es)...");
            File workingDir = new File(output, "indexsegment-workingdir");
            for (int k = 0; k < outDirs.size(); k++) {
                processedRecords++;
                if (workingDir.exists()) {
                    FileUtil.fullyDelete(workingDir);
                }
                IndexSegment indexer = new IndexSegment(nfs, Integer.MAX_VALUE, (File) outDirs.get(k),
                        workingDir);
                indexer.indexPages();
                FileUtil.fullyDelete(workingDir);
            }
        }
        if (delSegs) {
            // This deletes also all corrupt segments, which are
            // unusable anyway
            stage = SegmentMergeStatus.STAGE_DELETING;
            totalRecords = allsegdirs.size();
            processedRecords = 0L;
            LOG.info("* Deleting old segments...");
            for (int k = 0; k < allsegdirs.size(); k++) {
                processedRecords++;
                FileUtil.fullyDelete((File) allsegdirs.get(k));
            }
        }
        delta = System.currentTimeMillis() - start;
        float eps = (float) total / (float) (delta / 1000);
        LOG.info("Finished SegmentMergeTool: INPUT: " + total + " -> OUTPUT: " + outputCnt + " entries in "
                + ((float) delta / 1000f) + " s (" + eps + " entries/sec).");
    } catch (Exception e) {
        e.printStackTrace();
        LOG.severe(e.getMessage());
    }
}

From source file:org.apache.solr.codecs.test.testGetStoredFields.java

License:Apache License

public static void getDoc(String searchField, String searchString) throws IOException, ParseException {

    System.out.println("Searching for '" + searchString + "'");
    Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
    IndexReader indexReader = DirectoryReader.open(luceneDir);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
    QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField, analyzer);
    Query query = queryParser.parse(searchString);
    indexSearcher.search(query, hitCountCollector);
    System.out.println("Word: " + searchString + "; Number of hits: " + hitCountCollector.getTotalHits());
    System.out.println("maxdocs=" + indexReader.maxDoc());
    org.apache.lucene.search.TopDocs docs = indexSearcher.search(query, 100);
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        Document doc1 = indexReader.document(docs.scoreDocs[i].doc);
        System.out.println("title=" + doc1.get("title"));
        System.out.println("content=" + doc1.get("content"));
        System.out.println("global_bu_id=" + doc1.get("global_bu_id"));
        System.out.println("omega_order_num=" + doc1.get("omega_order_num"));
        System.out.println("------");
    }/*from   w ww .j  a v  a2s.  c  o  m*/
    luceneDir.close();

}

From source file:org.apache.solr.request.uninverted.UnInvertedField.java

License:Apache License

public static BitDocSet ajustBase(int times, BitDocSet baseAdvanceDocs, IndexReader reader) {
    try {//  w  w w  .  j  a v a2 s .  c  o m
        if (baseAdvanceDocs == null) {
            return null;
        }
        int maxdoc = reader.maxDoc();
        int oversize = maxdoc / times;
        int size = baseAdvanceDocs.size();
        int maxinterval = reader.getMaxInterval();
        log.info("ajustBase " + maxinterval + ",baseAdvanceDocs=" + size + "@" + oversize + "@" + maxdoc + ","
                + reader.getClass().getCanonicalName());
        if (size >= oversize || maxinterval > 256) {
            return null;
        }
    } catch (Exception e) {
        return null;
    }

    return baseAdvanceDocs;
}

From source file:org.apache.solr.request.uninverted.UnInvertedField.java

License:Apache License

public static BitDocSet cloneBitset(DocSet baseAdvanceDocs, IndexReader reader) {
    if (baseAdvanceDocs instanceof BitDocSet) {
        BitDocSet rtn = (BitDocSet) baseAdvanceDocs;
        OpenBitSet newbits = (OpenBitSet) (rtn.getBits().clone());
        return new BitDocSet(newbits, rtn.size());
    }/* w  ww .j a  v a  2s.  c o m*/

    OpenBitSet bs = new OpenBitSet(reader.maxDoc());
    DocIterator iter = baseAdvanceDocs.iterator();
    int pos = 0;
    while (iter.hasNext()) {
        bs.fastSet(iter.nextDoc());
        pos++;
    }

    return new BitDocSet(bs, pos);
}

From source file:org.apache.solr.search.function.FileFloatSource.java

License:Apache License

private static float[] getFloats(FileFloatSource ffs, IndexReader reader) {
    float[] vals = new float[reader.maxDoc()];
    if (ffs.defVal != 0) {
        Arrays.fill(vals, ffs.defVal);
    }//from   www. j  a v  a2s .c om
    InputStream is;
    String fname = "external_" + ffs.field.getName();
    try {
        is = VersionedFile.getLatestFile(ffs.dataDir, fname);
    } catch (IOException e) {
        // log, use defaults
        SolrCore.log.error("Error opening external value source file: " + e);
        return vals;
    }

    BufferedReader r = new BufferedReader(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));

    String idName = ffs.keyField.getName();
    FieldType idType = ffs.keyField.getType();

    // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
    // because of this, simply ask the reader for a new termEnum rather than
    // trying to use skipTo()

    List<String> notFound = new ArrayList<String>();
    int notFoundCount = 0;
    int otherErrors = 0;

    char delimiter = '=';

    BytesRef internalKey = new BytesRef();

    try {
        TermsEnum termsEnum = MultiFields.getTerms(reader, idName).iterator(null);
        DocsEnum docsEnum = null;

        // removing deleted docs shouldn't matter
        // final Bits liveDocs = MultiFields.getLiveDocs(reader);

        for (String line; (line = r.readLine()) != null;) {
            int delimIndex = line.lastIndexOf(delimiter);
            if (delimIndex < 0)
                continue;

            int endIndex = line.length();
            String key = line.substring(0, delimIndex);
            String val = line.substring(delimIndex + 1, endIndex);

            float fval;
            try {
                idType.readableToIndexed(key, internalKey);
                fval = Float.parseFloat(val);
            } catch (Exception e) {
                if (++otherErrors <= 10) {
                    SolrCore.log.error("Error loading external value source + fileName + " + e
                            + (otherErrors < 10 ? "" : "\tSkipping future errors for this file."));
                }
                continue; // go to next line in file.. leave values as default.
            }

            if (!termsEnum.seekExact(internalKey)) {
                if (notFoundCount < 10) { // collect first 10 not found for logging
                    notFound.add(key);
                }
                notFoundCount++;
                continue;
            }

            docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
            int doc;
            while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                vals[doc] = fval;
            }
        }

    } catch (IOException e) {
        // log, use defaults
        SolrCore.log.error("Error loading external value source: " + e);
    } finally {
        // swallow exceptions on close so we don't override any
        // exceptions that happened in the loop
        try {
            r.close();
        } catch (Exception e) {
        }
    }

    SolrCore.log.info("Loaded external value source " + fname
            + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound));

    return vals;
}