Example usage for org.apache.lucene.index IndexReader getTermVector

List of usage examples for org.apache.lucene.index IndexReader getTermVector

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader getTermVector.

Prototype

public final Terms getTermVector(int docID, String field) throws IOException 

Source Link

Document

Retrieve term vector for this document and field, or null if term vectors were not indexed.

Usage

From source file:lucene.CosineDocumentSimilarity.java

Map<String, Integer> getTermFrequencies(IndexReader reader, int docId) throws IOException {
    Terms vector = reader.getTermVector(docId, CONTENT);
    TermsEnum termsEnum = null;/*w  w w  .  j  ava2 s . com*/
    //   termsEnum = vector.iterator(termsEnum);
    Map<String, Integer> frequencies = new HashMap<>();
    BytesRef text = null;
    while ((text = termsEnum.next()) != null) {
        String term = text.utf8ToString();
        int freq = (int) termsEnum.totalTermFreq();
        frequencies.put(term, freq);
        terms.add(term);
    }
    return frequencies;
}

From source file:lucenetools.TermData.java

License:Apache License

/**
 * Main application. //from  ww w  .j a v  a  2 s.c  o  m
 *
 * @param args the command line arguments
 */
public static void main(String[] args) {
    Options opts = new Options();
    CommandLine commandLine = new CommandLine();

    // if no command line options specified, user wants help
    if (0 == args.length) {
        commandLine.showHelp();
        System.exit(0);
    }

    // extract command line args and store in opts
    if (!commandLine.parse(args, opts))
        System.exit(1);

    if (opts.showHelp) {
        commandLine.showHelp();
        System.exit(0);
    }

    // validate all command line options
    if (!commandLine.isValid(opts))
        System.exit(1);

    // report all command line options to the user
    System.out.println("\nLuceneToMtx version " + VERSION + ".");
    commandLine.printOpts(opts);

    long maxMemory = Runtime.getRuntime().maxMemory() / 1024 / 1024;
    System.out.println("Java runtime max memory: " + maxMemory + " MB.");

    // Build a map and assign a dictionary index to each term.
    // Include only those terms that survive the min term freq cutoff.
    Map<String, Integer> dictMap = new TreeMap<>();

    File file = null;
    System.out.println("Processing index...");
    try {
        file = new File(opts.indexDir);
        IndexReader reader = DirectoryReader.open(FSDirectory.open(file));
        TermsEnum te = null;
        int nnz = 0, numCols = 0, maxDocs = reader.maxDoc();
        LinkedList<FeatureVector> matrixData = new LinkedList<>();

        // add other fields
        Collection<String> fields = new ArrayList<>();
        if (opts.fields > 0) {
            fields = MultiFields.getIndexedFields(reader);
            fields.remove(CONTENTSFIELD);
            fields.remove(PATHFIELD);
        }

        if (!extractTerms(reader, dictMap, opts.minTermFreq, maxDocs - 1, opts.maxTermPercentage))
            System.exit(1);

        // set of field names to extract
        Set<String> fieldSet = new HashSet<>();
        fieldSet.add(PATHFIELD);
        for (String s : fields) {
            fieldSet.add(s);
        }

        for (int i = 0; i < maxDocs; ++i) {
            // get term vector for next document
            Terms terms = reader.getTermVector(i, CONTENTSFIELD);
            if (terms == null)
                continue;

            te = terms.iterator(te);
            FeatureVector fv = new FeatureVector(numCols);

            int numEntries = buildFeatureVector(fv, te, dictMap);
            if (numEntries > 0) {
                // extract document path and save with FeatureVector
                Document doc = reader.document(i, fieldSet);
                fv.docPath = doc.get(PATHFIELD);

                // add any additional fields
                for (String s : fields) {
                    fv.fields.put(s, doc.get(s));
                }

                //System.out.println("processing document:" + fv.docPath);

                matrixData.add(fv);
                nnz += numEntries;
                ++numCols;
            }
        }

        // Sort the feature vectors by their document path field.  Write 
        // the matrix columns in this sorted order.
        Collections.sort(matrixData, new FeatureVectorComparator());
        File outdir = new File(opts.outDir);
        writeMatrixMarketFile(new File(outdir, MATRIXFILE), matrixData, dictMap.size(), numCols, nnz);
        System.out.println("Wrote " + MATRIXFILE + ".");
        writeDictionaryFile(new File(outdir, DICTFILE), dictMap);
        System.out.println("Wrote " + DICTFILE + ".");
        writeDocumentFile(new File(outdir, DOCFILE), matrixData);
        System.out.println("Wrote " + DOCFILE + ".");
        writeFieldFiles(outdir, fields, matrixData);
    } catch (IndexNotFoundException e) {
        if (null != file) {
            System.out.println("Lucene index not found in: " + file.getAbsolutePath());
        }
    } catch (IOException e) {
        System.out.println("LuceneToMtx exception: caught a " + e.getClass() + "\nMessage: " + e.getMessage());
    }
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRef spare = new CharsRef();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*from ww w  .ja v  a  2 s. c om*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
                    final TermsEnum termsEnum = v.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        UnicodeUtil.UTF8toUTF16(text, spare);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRefBuilder spare = new CharsRefBuilder();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*  w w w .  j  a  v  a2  s .c  o  m*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this
        // can
        // be 0
        // for
        // non-indexed
        // fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>();
                    final TermsEnum termsEnum = v.iterator();
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        spare.copyUTF8Bytes(text);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;/* ww  w  .  j a v a 2  s.c  o m*/
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.nlp4l.lucene.LuceneDocTermVector.java

License:Apache License

/**
 * Lucene????/*w w w  . j a  v  a 2s .  co m*/
 * 
 * @param reader ??Lucene???{@link IndexReader}
 * @param docId ???LuceneID
 * @param fieldName ???Lucene??
 * @param size ?
 * @param termsReuse null?????
 * @param liveDocs null?????
 * @param twf null???{@link DefaultTfIdfTermWeightFactory}???
 * @param stopWords ??????????????????null????
 * @throws IOException
 */
public LuceneDocTermVector(IndexReader reader, int docId, String fieldName, int size, Terms termsReuse,
        Bits liveDocs, TermWeightFactory twf, Set<String> stopWords) throws IOException {
    liveDocs = liveDocs == null ? MultiFields.getLiveDocs(reader) : liveDocs;
    twf = twf == null ? new DefaultTfIdfTermWeightFactory(reader, docId, fieldName, liveDocs) : twf;
    queue = new TermWeightQueue(size);

    if (termsReuse == null)
        termsReuse = reader.getTermVector(docId, fieldName);
    TermsEnum termsEnum = termsReuse.iterator();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        // candidate feature term
        final String term = text.utf8ToString();
        if (stopWords != null && stopWords.contains(term))
            continue;
        final TermWeight termWeight = twf.create(text);
        if (termWeight == null)
            continue;

        Map.Entry<String, TermWeight> entry = new Map.Entry<String, TermWeight>() {
            public String getKey() {
                return term;
            }

            public TermWeight getValue() {
                return termWeight;
            }

            public TermWeight setValue(TermWeight arg0) {
                // TODO Auto-generated method stub
                return null;
            }
        };
        queue.insertWithOverflow(entry);
    }
}

From source file:org.ohdsi.usagi.tests.TestLucene.java

License:Apache License

public static void main(String[] args) throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    //Analyzer analyzer = new UsagiAnalyzer();
    FieldType textVectorField = new FieldType();
    textVectorField.setIndexed(true);// www  .  j a  va  2s .  co  m
    textVectorField.setTokenized(true);
    textVectorField.setStoreTermVectors(true);
    textVectorField.setStoreTermVectorPositions(false);
    textVectorField.setStoreTermVectorPayloads(false);
    textVectorField.setStoreTermVectorOffsets(false);
    textVectorField.setStored(true);
    textVectorField.freeze();

    File indexFolder = new File(folder);
    if (indexFolder.exists())
        DirectoryUtilities.deleteDir(indexFolder);

    Directory dir = FSDirectory.open(indexFolder);

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new Field("F", "word1 word2 w3 word4", textVectorField));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("F", "word1 word2 w3", textVectorField));
    writer.addDocument(doc);

    writer.close();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder)));
    for (int i = 0; i < reader.numDocs(); i++) {
        TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null);
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            System.out.print(text.utf8ToString() + ",");
        }
        System.out.println();
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    // mlt.setMinTermFreq(0);
    // mlt.setMinDocFreq(0);
    // mlt.setMaxDocFreq(9999);
    // mlt.setMinWordLen(0);
    // mlt.setMaxWordLen(9999);
    // mlt.setMaxDocFreqPct(100);
    // mlt.setMaxNumTokensParsed(9999);
    // mlt.setMaxQueryTerms(9999);
    // mlt.setStopWords(null);
    // mlt.setFieldNames(new String[] { "F" });
    // mlt.setAnalyzer(new UsagiAnalyzer());
    // Query query = mlt.like("F", new StringReader("Systolic blood pressure"));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer);
    Query query = parser.parse("word1");

    Explanation explanation = searcher.explain(query, 0);
    print(explanation);
    System.out.println();
    explanation = searcher.explain(query, 1);
    print(explanation);
    System.out.println();

    TopDocs topDocs = searcher.search(query, 99);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F"));
    }
}

From source file:retriever.TermWt.java

DocVector(IndexReader reader, int docId) throws Exception {
    this.reader = reader;
    Terms terms = reader.getTermVector(docId, FIELD_ANALYZED_CONTENT);

    TermsEnum termsEnum;/*from ww  w  .  j a v  a2s .c om*/
    BytesRef term;
    List<TermWt> tfvec = new ArrayList<>();

    // Construct the normalized tf vector
    termsEnum = terms.iterator(null); // access the terms for this field
    while ((term = termsEnum.next()) != null) { // explore the terms for this field
        String termStr = term.utf8ToString();
        DocsEnum docsEnum = termsEnum.docs(null, null); // enumerate through documents, in this case only one
        while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            //get the term frequency in the document
            int tf = docsEnum.freq();
            tfvec.add(new TermWt(termStr, tf));
        }
    }

    Collections.sort(tfvec);

    vec = new TermWt[tfvec.size()];
    vec = tfvec.toArray(vec);
}

From source file:trustframework.evidence.github.ConversationMimicry.java

private Map<String, Integer> getFrequencyMap(IndexReader ir, Integer docIndex) throws IOException {
    Terms frequencyVector = ir.getTermVector(docIndex, "Content");
    TermsEnum termsIterator = frequencyVector.iterator();
    Map<String, Integer> frequencyMap = new HashMap<>();
    BytesRef text;//  w ww .  j a  v  a2s .  co m
    while ((text = termsIterator.next()) != null) {
        String term = text.utf8ToString();
        int freq = (int) termsIterator.totalTermFreq();
        frequencyMap.put(term, freq);
    }
    return frequencyMap;
}

From source file:vectorizer.TermInfo.java

private DocVector buildTerms(IndexReader reader, int docId, int numDocs, Dictionary dict) throws Exception {
    DocVector wmap = new DocVector(reader.document(docId).get(ID_FIELD_NAME));
    Terms tfvector;/* w w w.  java 2  s .c o m*/
    TermsEnum termsEnum;
    String termText;
    BytesRef term;
    int tf;
    float idf;

    tfvector = reader.getTermVector(docId, CONTENT_FIELD_NAME);

    if (tfvector == null)
        return null;

    // Construct the normalized tf vector
    termsEnum = tfvector.iterator(); // access the terms for this field

    while ((term = termsEnum.next()) != null) { // explore the terms for this field
        tf = (int) termsEnum.totalTermFreq();
        termText = term.utf8ToString();

        float df = reader.docFreq(new Term(CONTENT_FIELD_NAME, termText));
        idf = (float) Math.log(1 + numDocs / df);

        TermInfo termInfo = new TermInfo(termText, tf, getTermId(termText), idf);
        if (dict != null) {
            Translations translations = dict.getTranslationTerms(termText);
            for (TranslationInfo tinfo : translations.getTranslationInfo()) {
                termInfo.tf *= tinfo.weight;
            }
        }

        // Update global stats
        TermInfo seenTermInfo = collFreq.get(termText);
        if (seenTermInfo == null) {
            seenTermInfo = new TermInfo(termInfo.term, termInfo.tf, termInfo.id, termInfo.idf);
            collFreq.put(termText, seenTermInfo);
        } else {
            seenTermInfo.tf += termInfo.tf; // coll freq
        }

        wmap.addTermInfo(termInfo);
    }

    return wmap;
}