Example usage for org.apache.lucene.document FieldType setTokenized

List of usage examples for org.apache.lucene.document FieldType setTokenized

Introduction

In this page you can find the example usage for org.apache.lucene.document FieldType setTokenized.

Prototype

public void setTokenized(boolean value) 

Source Link

Document

Set to true to tokenize this field's contents via the configured Analyzer .

Usage

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Parse field type String//ww  w .j  av a  2s.  c om
 * 
 * @param name Name of the field
 * @param value Value of the field
 * @param options a string composed of letters in any order following Luke convention to describe fields
 * IdfpoPSV
 * I: Indexed
 * d: docs
 * f: freqs
 * p: pos
 * o: offset
 * P: payloads
 * S: Stored
 * V: TermVector
 */
public static FieldType fieldType(String options) {
    FieldType type;
    if (options == null)
        return new FieldType();
    if ("S".equals(options)) {
        type = new FieldType();
        type.setStored(true);
        return type;
    }
    if (options.contains("S")) {
        type = new FieldType(TextField.TYPE_STORED);
    } else {
        type = new FieldType(TextField.TYPE_NOT_STORED);
    }
    // optimize ?
    type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    if (options.contains("p")) {
        type.setStoreTermVectorPositions(true);
    }

    if (options.contains("o")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorOffsets(true);
    }
    if (options.contains("P")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectorPayloads(true);
    }
    if (options.contains("V")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
    }
    return type;
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document/*from ww w .j ava2  s .co m*/
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:cc.twittertools.index.IndexStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(/*w  w  w.j a  v a2  s. c om*/
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexStatuses.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
    config.setOpenMode(OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
            doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
            doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES));
            doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId,
                        Field.Store.YES));
                doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(),
                        Field.Store.YES));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES));
                doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(),
                        Field.Store.YES));
                doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}

From source file:ci6226.buildindex.java

/**
 * @param args the command line arguments
 *//*  ww w.  j a v a2 s. co  m*/
public static void main(String[] args) throws FileNotFoundException, IOException, ParseException {
    String file = "/home/steven/Dropbox/workspace/ntu_coursework/ci6226/Assiment/yelpdata/yelp_training_set/yelp_training_set_review.json";
    JSONParser parser = new JSONParser();

    BufferedReader in = new BufferedReader(new FileReader(file));
    //  List<Document> jdocs = new LinkedList<Document>();
    Date start = new Date();
    String indexPath = "./myindex";
    System.out.println("Indexing to directory '" + indexPath + "'...");
    // Analyzer analyzer= new NGramAnalyzer(2,8);
    Analyzer analyzer = new myAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    Directory dir = FSDirectory.open(new File(indexPath));
    // :Post-Release-Update-Version.LUCENE_XY:
    // TODO: try different analyzer,stop words,words steming check size
    //   Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);

    // Add new documents to an existing index:
    // iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    // Optional: for better indexing performance, if you
    // are indexing many documents, increase the RAM
    // buffer.  But if you do this, increase the max heap
    // size to the JVM (eg add -Xmx512m or -Xmx1g):
    //
    // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    //  writer.addDocuments(jdocs);
    int line = 0;
    while (in.ready()) {
        String s = in.readLine();
        Object obj = JSONValue.parse(s);
        JSONObject person = (JSONObject) obj;
        String text = (String) person.get("text");
        String user_id = (String) person.get("user_id");
        String business_id = (String) person.get("business_id");
        String review_id = (String) person.get("review_id");
        JSONObject votes = (JSONObject) person.get("votes");
        long funny = (Long) votes.get("funny");
        long cool = (Long) votes.get("cool");
        long useful = (Long) votes.get("useful");
        Document doc = new Document();
        Field review_idf = new StringField("review_id", review_id, Field.Store.YES);
        doc.add(review_idf);
        Field business_idf = new StringField("business_id", business_id, Field.Store.YES);
        doc.add(business_idf);

        //http://qindongliang1922.iteye.com/blog/2030639
        FieldType ft = new FieldType();
        ft.setIndexed(true);//  
        ft.setStored(true);//  
        ft.setStoreTermVectors(true);
        ft.setTokenized(true);
        ft.setStoreTermVectorPositions(true);//?  
        ft.setStoreTermVectorOffsets(true);//???  

        Field textf = new Field("text", text, ft);

        doc.add(textf);
        //    Field user_idf = new StringField("user_id", user_id, Field.Store.YES);
        //     doc.add(user_idf);
        //      doc.add(new LongField("cool", cool, Field.Store.YES));
        //      doc.add(new LongField("funny", funny, Field.Store.YES));
        //       doc.add(new LongField("useful", useful, Field.Store.YES));

        writer.addDocument(doc);

        System.out.println(line++);
    }

    writer.close();
    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    // BufferedReader in = new BufferedReader(new FileReader(file));
    //while (in.ready()) {
    //  String s = in.readLine();
    //  //System.out.println(s);
    // JSONObject jsonObject = (JSONObject) ((Object)s);
    //      String rtext = (String) jsonObject.get("text");
    //      System.out.println(rtext);
    //      //long age = (Long) jsonObject.get("age");
    //      //System.out.println(age);
    //}
    //in.close();
}

From source file:ci6226.eval_index_writer.java

public eval_index_writer(Analyzer _analyzer, String _iReviewLocation, String _dir) throws IOException {
    String file = _iReviewLocation;
    JSONParser parser = new JSONParser();
    BufferedReader in = new BufferedReader(new FileReader(file));
    Date start = new Date();
    String indexPath = "./" + _dir;
    System.out.println("Indexing to directory '" + indexPath + "'...");
    Analyzer analyzer = _analyzer;/*from  w w  w.j a  va2 s.c om*/
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    Directory dir = FSDirectory.open(new File(indexPath));
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);
    //  int line=0;
    while (in.ready()) {
        String s = in.readLine();
        Object obj = JSONValue.parse(s);
        JSONObject person = (JSONObject) obj;
        String text = (String) person.get("text");
        String user_id = (String) person.get("user_id");
        String business_id = (String) person.get("business_id");
        String review_id = (String) person.get("review_id");
        JSONObject votes = (JSONObject) person.get("votes");
        long funny = (Long) votes.get("funny");
        long cool = (Long) votes.get("cool");
        long useful = (Long) votes.get("useful");
        Document doc = new Document();
        Field review_idf = new StringField("review_id", review_id, Field.Store.YES);
        doc.add(review_idf);
        //    Field business_idf = new StringField("business_id", business_id, Field.Store.YES);
        //     doc.add(business_idf);

        //http://qindongliang1922.iteye.com/blog/2030639
        FieldType ft = new FieldType();
        ft.setIndexed(true);//
        ft.setStored(true);//
        ft.setStoreTermVectors(true);
        ft.setTokenized(true);
        ft.setStoreTermVectorPositions(true);//
        ft.setStoreTermVectorOffsets(true);//

        Field textf = new Field("text", text, ft);

        doc.add(textf);
        //    Field user_idf = new StringField("user_id", user_id, Field.Store.YES);
        //     doc.add(user_idf);
        //      doc.add(new LongField("cool", cool, Field.Store.YES));
        //      doc.add(new LongField("funny", funny, Field.Store.YES));
        //       doc.add(new LongField("useful", useful, Field.Store.YES));

        writer.addDocument(doc);

        //  System.out.println(line++);
    }

    writer.close();
    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) {
    document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId()));

    FieldType fieldType = new FieldType();
    fieldType.setOmitNorms(true);/*from  w  w  w  . j  av a  2  s  . c o  m*/
    fieldType.setIndexOptions(IndexOptions.DOCS);
    fieldType.setStored(true);
    fieldType.setTokenized(false);
    fieldType.setDocValuesType(DocValuesType.SORTED);
    fieldType.freeze();
    Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(),
            entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString())
                    : new BytesRef(""),
            fieldType);
    document.add(field);
    document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(),
            entry.getChromosome().getName(), true));

    document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(),
            entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES));
    document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(),
            Field.Store.YES));

    document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES));
    document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : "")));

    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(),
            entry.getChromosome().getId().toString()));

    document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString()));
    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(),
            entry.getUuid().toString()));
}

From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java

License:BEER-WARE LICENSE

private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) {
    com.github.hotware.lucene.extension.bean.type.Type typeWrapper;
    try {/*from  w  w  w  . j  a  va2  s.c o  m*/
        // TODO: maybe cache these?
        typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance();
    } catch (InstantiationException | IllegalAccessException e) {
        throw new RuntimeException(e);
    }
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(bf.index());
    fieldType.setStored(bf.store());
    fieldType.setTokenized(bf.tokenized());
    fieldType.setStoreTermVectors(bf.storeTermVectors());
    fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions());
    fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads());
    fieldType.setOmitNorms(bf.omitNorms());
    fieldType.setIndexOptions(bf.indexOptions());
    typeWrapper.configureFieldType(fieldType);
    fieldType.freeze();
    return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf);
}

From source file:com.globalsight.ling.lucene.IndexDocument.java

License:Apache License

static public Document IndexDocument(long p_mainId, long p_subId, String p_text) {
    Document result = new Document();
    FieldType ft;

    // Add the main id (tu id, concept id) as a field named
    // "mainid". Use a Keyword field so that the id is stored
    // with the document, and is searchable.
    //result.add(Field.Keyword(MAINID, String.valueOf(p_mainId)));
    ft = new FieldType();
    ft.setTokenized(false);
    ft.setIndexed(false);//from  w  w  w. j ava 2s  .co m
    ft.setStored(true);
    result.add(new Field(MAINID, String.valueOf(p_mainId), ft));

    // Add the sub id (tuv id, term id) as a field named
    // "subid". Use a Keyword field so that the id is stored
    // with the document, and is searchable.
    //result.add(Field.Keyword(SUBID, String.valueOf(p_subId)));
    ft = new FieldType();
    ft.setTokenized(false);
    ft.setIndexed(false);
    ft.setStored(true);
    result.add(new Field(SUBID, String.valueOf(p_subId), ft));

    // Add the contents as an UnStored field so it will get
    // tokenized and indexed, but not stored.
    // result.add(Field.UnStored(TEXT, p_text));
    ft = new FieldType();
    ft.setTokenized(true);
    ft.setIndexed(true);
    ft.setStored(false);
    result.add(new Field(TEXT, p_text, ft));

    return result;
}

From source file:com.globalsight.ling.lucene.IndexDocument.java

License:Apache License

static public Document DataDocument(long p_mainId, long p_subId, String p_text) {
    Document result = new Document();
    FieldType ft;

    // Add the main id (tu id, concept id) as a field named
    // "mainid". Use a Keyword field so that the id is stored
    // with the document, and is searchable.
    //result.add(Field.Keyword(MAINID, String.valueOf(p_mainId)));
    ft = new FieldType();
    ft.setTokenized(false);
    ft.setIndexed(false);/*  w w w.  j  a v  a 2s.  c  o m*/
    ft.setStored(true);
    result.add(new Field(MAINID, String.valueOf(p_mainId), ft));

    // Add the sub id (tuv id, term id) as a field named
    // "subid". Use a Keyword field so that the id is stored
    // with the document, and is searchable.
    //result.add(Field.Keyword(SUBID, String.valueOf(p_subId)));
    ft = new FieldType();
    ft.setTokenized(false);
    ft.setIndexed(false);
    ft.setStored(true);
    result.add(new Field(SUBID, String.valueOf(p_subId), ft));

    // Add the contents as a Text field so it will get tokenized,
    // indexed, and stored. (Useful for short text but not for
    // long documents.)
    //result.add(Field.Text(TEXT, p_text));
    ft = new FieldType();
    ft.setTokenized(true);
    ft.setIndexed(true);
    ft.setStored(true);
    result.add(new Field(TEXT, p_text, ft));

    return result;
}

From source file:com.globalsight.ling.tm2.lucene.TuvDocument.java

License:Apache License

private Document createDocument() {
    Document doc = new Document();
    FieldType ft;
    Field field;// ww  w  . ja  va  2  s  . com

    // text field. not stored, indexed, tokenized.
    ft = new FieldType();
    ft.setStored(false);
    ft.setIndexed(true);
    ft.setTokenized(true);
    field = new Field(TEXT_FIELD, m_text, ft);// false, true, true);
    doc.add(field);

    // Tuv id field. stored, indexed, not tokenized.
    ft = new FieldType();
    ft.setStored(true);
    ft.setIndexed(true);
    ft.setTokenized(false);
    field = new Field(TUV_ID_FIELD, m_tuvId.toString(), ft);// true, true, false);
    doc.add(field);

    // Tu id field. stored, not indexed, not tokenized.
    ft = new FieldType();
    ft.setStored(true);
    ft.setIndexed(false);
    ft.setTokenized(false);
    field = new Field(TU_ID_FIELD, m_tuId.toString(), ft);//true, false, false);
    doc.add(field);

    // TM id field. stored, not indexed, not tokenized.
    ft = new FieldType();
    ft.setStored(true);
    ft.setIndexed(false);
    ft.setTokenized(false);
    field = new Field(TM_ID_FIELD, m_tmId.toString(), ft);//true, false, false);
    doc.add(field);

    // Token count field. stored, not indexed, not tokenized.
    ft = new FieldType();
    ft.setStored(true);
    ft.setIndexed(false);
    ft.setTokenized(false);
    field = new Field(TOKEN_COUNT_FIELD, m_totalTokenCount.toString(), ft);//true, false, false);
    doc.add(field);

    // Is source field. stored, not indexed, not tokenized.
    ft = new FieldType();
    ft.setStored(true);
    ft.setIndexed(false);
    ft.setTokenized(false);
    field = new Field(IS_SOURCE_FIELD, m_isSourceLocale.toString(), ft);// true, false, false);
    doc.add(field);

    // target locales field. not stored, indexed, tokenized.
    if (m_targetLocales != null) {
        StringBuilder locs = new StringBuilder();
        for (String locale : m_targetLocales) {
            locs.append(locale);
            locs.append(' ');
        }
        ft = new FieldType();
        ft.setStored(false);
        ft.setIndexed(true);
        ft.setTokenized(true);
        field = new Field(TARGET_LOCALES_FIELD, locs.toString(), ft);//false, true, true);
        doc.add(field);
    }

    return doc;
}