Example usage for org.apache.lucene.document StringField TYPE_NOT_STORED

List of usage examples for org.apache.lucene.document StringField TYPE_NOT_STORED

Introduction

In this page you can find the example usage for org.apache.lucene.document StringField TYPE_NOT_STORED.

Prototype

FieldType TYPE_NOT_STORED

To view the source code for org.apache.lucene.document StringField TYPE_NOT_STORED.

Click Source Link

Document

Indexed, not tokenized, omits norms, indexes DOCS_ONLY, not stored.

Usage

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

private void addDummy(IndexWriter iw) throws IOException {
    Document dummy = new Document();
    Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED);
    dummy.add(f);//  www . j av a2 s .c  o  m
    iw.addDocument(dummy);
}

From source file:di.uniba.it.nlpita.index.BuildSeoDwarfIndex.java

private static void indexOntologyElement(IndexWriter writer, OntologyElementToken e,
        Collection<String> domainOf, Collection<String> rangeOf, Collection<String> extendedDomain)
        throws Exception {
    Document doc = new Document();
    doc.add(new Field("label", e.getLabel(), TextField.TYPE_NOT_STORED));
    doc.add(new IntField("id", e.getId(), IntField.TYPE_STORED));
    doc.add(new Field("type", e.getType(), StringField.TYPE_NOT_STORED));
    if (domainOf != null) {
        for (String d : domainOf) { //the first element is the URI
            doc.add(new Field("domainOfProperty", d, StringField.TYPE_NOT_STORED));
        }//from  w w w.j  a  v a 2 s  . c o  m
    }
    if (rangeOf != null) {
        for (String r : rangeOf) { //the first element is the URI
            doc.add(new Field("rangeOfProperty", r, StringField.TYPE_NOT_STORED));
        }
    }
    if (extendedDomain != null) {
        for (String d : extendedDomain) { //the first element is the URI
            doc.add(new Field("propertyDomain", d, StringField.TYPE_NOT_STORED));
        }
    }
    writer.addDocument(doc);
}

From source file:edu.ucla.cs.scai.linkedspending.index.IndexBuilder.java

private void indexElement(IndexWriter writer, String uri, String keyWords, String type) throws IOException {
    Document doc = new Document();
    for (String k : keywordsExtractor.normalizeWords(keyWords)) {
        doc.add(new Field("label", k, StringField.TYPE_NOT_STORED));
    }//from  w w w.ja  va  2 s .c o  m
    doc.add(new Field("uri", uri, StringField.TYPE_STORED));
    doc.add(new Field("type", type, StringField.TYPE_NOT_STORED));
    if (type.equals("dataset")) {
        System.out.println("Indexed dataset " + uri);
    }
    writer.addDocument(doc);
}

From source file:fr.paris.lutece.plugins.calendar.service.search.CalendarIndexer.java

License:Open Source License

/**
 * Builds a document which will be used by Lucene during the indexing of the
 * calendar list//from  www  .  j a  v a  2 s  .c o  m
 * @param occurrence The occurence event
 * @param strUrl the url of the subject
 * @param strRoleKey The role key
 * @param strAgenda the calendar id
 * @return A Lucene {@link Document} containing QuestionAnswer Data
 * @throws IOException The IO Exception
 * @throws InterruptedException The InterruptedException
 */
public static org.apache.lucene.document.Document getDocument(OccurrenceEvent occurrence, String strRoleKey,
        String strUrl, String strAgenda) throws IOException, InterruptedException {
    // make a new, empty document
    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType ft = new FieldType(StringField.TYPE_STORED);
    ft.setOmitNorms(false);

    FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setOmitNorms(false);

    //add the id of the calendar
    doc.add(new Field(Constants.FIELD_CALENDAR_ID, strAgenda + "_" + Constants.CALENDAR_SHORT_NAME,
            ftNotStored));

    //add the category of the event
    Collection<Category> arrayCategories = occurrence.getListCategories();
    String strCategories = Constants.EMPTY_STRING;

    if (arrayCategories != null) {
        Iterator<Category> i = arrayCategories.iterator();

        while (i.hasNext()) {
            strCategories += (i.next().getId() + BLANK);
        }
    }

    doc.add(new Field(Constants.FIELD_CATEGORY, strCategories, TextField.TYPE_NOT_STORED));

    doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft));

    // Add the url as a field named "url".  Use an UnIndexed field, so
    // that the url is just stored with the question/answer, but is not searchable.
    doc.add(new Field(SearchItem.FIELD_URL, strUrl, ft));

    // Add the uid as a field, so that index can be incrementally maintained.
    // This field is not stored with question/answer, it is indexed, but it is not
    // tokenized prior to indexing.
    String strIdEvent = String.valueOf(occurrence.getId());
    doc.add(new Field(SearchItem.FIELD_UID, strIdEvent + "_" + Constants.CALENDAR_SHORT_NAME, ft));

    // Add the last modified date of the file a field named "modified".
    // Use a field that is indexed (i.e. searchable), but don't tokenize
    // the field into words.
    String strDate = Utils.getDate(occurrence.getDate());
    doc.add(new Field(SearchItem.FIELD_DATE, strDate, ft));

    String strContentToIndex = getContentToIndex(occurrence);
    ContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();

    try {
        new HtmlParser().parse(new ByteArrayInputStream(strContentToIndex.getBytes()), handler, metadata,
                new ParseContext());
    } catch (SAXException e) {
        throw new AppException("Error during page parsing.");
    } catch (TikaException e) {
        throw new AppException("Error during page parsing.");
    }

    //the content of the article is recovered in the parser because this one
    //had replaced the encoded caracters (as &eacute;) by the corresponding special caracter (as ?)
    StringBuilder sb = new StringBuilder(occurrence.getTitle() + " - " + handler.toString());

    // Add the description as a summary field, so that index can be incrementally maintained.
    // This field is stored, but it is not indexed
    int length = AppPropertiesService.getPropertyInt(PROPERTY_DESCRIPTION_MAX_CHARACTERS, 200);
    String strDescription = Utils.parseHtmlToPlainTextString(occurrence.getDescription());

    if (strDescription.length() > length) {
        strDescription = strDescription.substring(0, length) + PROPERTY_DESCRIPTION_ETC;
    }

    doc.add(new Field(SearchItem.FIELD_SUMMARY, strDescription, TextField.TYPE_STORED));
    doc.add(new Field(CalendarSearchItem.FIELD_HTML_SUMMARY, occurrence.getDescription(),
            TextField.TYPE_STORED));

    // Add the tag-stripped contents as a Reader-valued Text field so it will
    // get tokenized and indexed.
    doc.add(new Field(SearchItem.FIELD_CONTENTS, sb.toString(), TextField.TYPE_NOT_STORED));

    // Add the subject name as a separate Text field, so that it can be searched
    // separately.
    doc.add(new Field(SearchItem.FIELD_TITLE, occurrence.getTitle(), TextField.TYPE_STORED));

    doc.add(new Field(SearchItem.FIELD_TYPE, CalendarPlugin.PLUGIN_NAME, ft));

    // return the document
    return doc;
}

From source file:fr.paris.lutece.plugins.directory.service.search.DirectorySearchIndexer.java

License:Open Source License

/**
 * Builds a document which will be used by Lucene during the indexing of
 * this record//from ww w . j a  v a  2  s .co  m
 * @param record the record to convert into a document
 * @param listContentEntry the entries in this record that are marked as
 *            is_indexed
 * @param listTitleEntry the entries in this record that are marked as
 *            is_indexed_as_title
 * @param listSummaryEntry the entries in this record that are marked as
 *            is_indexed_as_summary
 * @param plugin the plugin object
 * @return a lucene document filled with the record data
 */
public Document getDocument(Record record, List<IEntry> listContentEntry, List<IEntry> listTitleEntry,
        List<IEntry> listSummaryEntry, Plugin plugin) {
    Document doc = new Document();

    FieldType ft = new FieldType(StringField.TYPE_STORED);
    ft.setOmitNorms(false);

    FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED);
    ftNotStored.setOmitNorms(false);
    ftNotStored.setTokenized(false);

    boolean bFallback = false;

    //Fallback if there is no entry marker as indexed_as_title
    //Uses the first indexed field instead
    if (listTitleEntry.isEmpty() && !listContentEntry.isEmpty()) {
        listTitleEntry.add(listContentEntry.get(0));
        bFallback = true;
    }

    String strTitle = getContentToIndex(record, listTitleEntry, plugin);

    //Fallback if fields were empty
    //Uses the first indexed field instead
    if (StringUtils.isBlank(strTitle) && !bFallback && !listContentEntry.isEmpty()) {
        listTitleEntry.clear();
        listTitleEntry.add(listContentEntry.get(0));
        strTitle = getContentToIndex(record, listTitleEntry, plugin);
    }

    //No more fallback. Giving up
    if (StringUtils.isBlank(strTitle)) {
        return null;
    }

    doc.add(new Field(SearchItem.FIELD_TITLE, strTitle, ft));

    if (!listContentEntry.isEmpty()) {
        String strContent = getContentToIndex(record, listContentEntry, plugin);

        if (StringUtils.isNotBlank(strContent)) {
            doc.add(new Field(SearchItem.FIELD_CONTENTS, strContent, TextField.TYPE_NOT_STORED));
        }
    }

    if (!listSummaryEntry.isEmpty()) {
        String strSummary = getContentToIndex(record, listSummaryEntry, plugin);

        if (StringUtils.isNotBlank(strSummary)) {
            doc.add(new StoredField(SearchItem.FIELD_SUMMARY, strSummary));
        }
    }

    String strRoleKey = record.getRoleKey();

    if (StringUtils.isBlank(strRoleKey)) {
        strRoleKey = ROLE_NONE;
    }

    doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft));

    String strDate = DateTools.dateToString(record.getDateCreation(), DateTools.Resolution.DAY);
    doc.add(new Field(SearchItem.FIELD_DATE, strDate, ft));

    String strDateModification = DateTools.dateToString(record.getDateModification(), DateTools.Resolution.DAY);
    doc.add(new Field(SearchItem.FIELD_DATE, strDateModification, ft));

    doc.add(new Field(SearchItem.FIELD_TYPE, DIRECTORY, ft));

    UrlItem url = new UrlItem(AppPathService.getPortalUrl());
    url.addParameter(XPageAppService.PARAM_XPAGE_APP, DIRECTORY);
    url.addParameter(PARAMETER_ID_DIRECTORY_RECORD, record.getIdRecord());
    url.addParameter(PARAMETER_VIEW_DIRECTORY_RECORD, "");
    doc.add(new Field(SearchItem.FIELD_URL, url.getUrl(), ft));

    //Add the uid as a field, so that index can be incrementally maintained.
    // This field is not stored with question/answer, it is indexed, but it is not
    // tokenized prior to indexing.
    String strUID = Integer.toString(record.getIdRecord()) + "_" + SHORT_NAME;
    doc.add(new Field(SearchItem.FIELD_UID, strUID, ftNotStored));

    return doc;
}

From source file:fr.paris.lutece.plugins.helpdesk.service.search.HelpdeskIndexer.java

License:Open Source License

/**
 * Builds a document which will be used by Lucene during the indexing of the
 * subject list//from   w w w. ja v  a  2s .  co  m
 * 
 * @param subject the {@link Subject} to index
 * @param strUrl the url of the subject
 * @param strRoleKey The role key
 * @param plugin The {@link Plugin}
 * @return The Lucene {@link Document} containing Subject data
 * @throws IOException The IO Exception
 * @throws InterruptedException The InterruptedException
 */
public static org.apache.lucene.document.Document getDocument(Subject subject, String strRoleKey, String strUrl,
        Plugin plugin) throws IOException, InterruptedException {
    // make a new, empty document
    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType ft = new FieldType(StringField.TYPE_STORED);
    ft.setOmitNorms(false);

    FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED);
    ftNotStored.setOmitNorms(false);

    // Add the url as a field named "url".  Use an UnIndexed field, so
    // that the url is just stored with the question/answer, but is not searchable.
    doc.add(new Field(SearchItem.FIELD_URL, strUrl, ft));

    // Add the uid as a field, so that index can be incrementally maintained.
    // This field is not stored with question/answer, it is indexed, but it is not
    // tokenized prior to indexing.
    String strIdSubject = String.valueOf(subject.getId());
    doc.add(new Field(SearchItem.FIELD_UID, strIdSubject + "_" + SHORT_NAME_SUBJECT, ftNotStored));

    doc.add(new Field(SearchItem.FIELD_CONTENTS, subject.getText(), ftNotStored));

    // Add the subject name as a separate Text field, so that it can be searched
    // separately.
    doc.add(new Field(SearchItem.FIELD_TITLE, subject.getText(), ft));

    doc.add(new Field(SearchItem.FIELD_TYPE, HelpdeskPlugin.PLUGIN_NAME, ft));

    doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft));

    // return the document
    return doc;
}

From source file:indexing.IndexFiles.java

License:Apache License

/**
 * Parses and stores the content of each Document to the fields
 * //from w  w w . j a v  a  2 s .  co m
 * @param doc
 *            The Document to which data is stored
 * @param file
 *            The input file which is being processed
 * @return The Document with all fields or null if relevant fields could not be found in the datasets
 * @throws IOException
 */
private static Document parseWikipage(Document doc, WikiPage page) throws IOException {

    doc.add(new TextField(Fieldname.TITLE.toString(), page.getTitle().trim(), Field.Store.YES));

    // Field to allow spellchecking against the full title
    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setOmitNorms(false);
    doc.add(new Field(Fieldname.SPELLCHECK.toString(), page.getTitle().trim(), ft));

    String fre = Utils.getMostFrequentWords(page.getWikiText(), Config.numOfFreqWords);
    // System.out.println(fre);
    doc.add(new TextField(Fieldname.FREQWORDS.toString(), fre, Field.Store.YES));
    // The body is only stored, not indexed. If it should be indexed use TextField
    // doc.add(new StoredField(Fieldname.BODY.toString(), Utils.wiki2text(page.getWikiText())));
    // doc.add(new IntField(Fieldname.ID.toString(), Integer.parseInt(page.getID()), Field.Store.YES));

    /*
     * Remove whitespace and parenthesis within the link, such that in tokenization each link is handled as one
     * token.
     */
    String links = "";
    for (String link : page.getLinks()) {
        links += link.replaceAll("[() .,'-]", "").toLowerCase() + " ";
    }
    // System.out.println(links);
    doc.add(new TextField(Fieldname.LINKS.toString(), links, Field.Store.YES));
    /*
     * Remove whitespace and parenthesis within the category, such that in tokenization each category is handled as
     * one token.
     */
    String categories = "";
    for (String category : page.getCategories()) {
        categories += category.replaceAll("[() .,'-]", "").toLowerCase() + " ";
    }
    doc.add(new TextField(Fieldname.CATEGORIES.toString(), categories, Field.Store.YES));

    return doc;
}

From source file:org.apache.blur.analysis.type.StringFieldTypeDefinition.java

License:Apache License

@Override
public Iterable<? extends Field> getFieldsForSubColumn(String family, Column column, String subName) {
    String name = getName(family, column.getName(), subName);
    Field field = new Field(name, column.getValue(), StringField.TYPE_NOT_STORED);
    if (isSortEnable()) {
        return addSort(column, name, field);
    }/*from  w w  w .j  a v a2s .  c  o  m*/
    return makeIterable(field);
}

From source file:org.apache.blur.mapreduce.lib.v2.DocumentWritable.java

License:Apache License

private void writeStringField(DataOutput out, StringField stringField) throws IOException {
    FieldType fieldType = stringField.fieldType();
    if (fieldType.equals(StringField.TYPE_STORED)) {
        out.writeBoolean(true);//from  w w w .j a  v a 2 s.com
    } else if (fieldType.equals(StringField.TYPE_NOT_STORED)) {
        out.writeBoolean(false);
    } else {
        throw new IOException("Non default FieldTypes for StringField not supported.");
    }
    writeString(out, stringField.stringValue());
}

From source file:org.apache.solr.legacy.BBoxStrategy.java

License:Apache License

/**
 * Creates this strategy.//from  ww  w.j av a  2 s  .c om
 * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
 * field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features
 * aren't needed then disable them.
 */
public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
    super(ctx, fieldNamePrefix);
    field_bbox = fieldNamePrefix;
    field_minX = fieldNamePrefix + SUFFIX_MINX;
    field_maxX = fieldNamePrefix + SUFFIX_MAXX;
    field_minY = fieldNamePrefix + SUFFIX_MINY;
    field_maxY = fieldNamePrefix + SUFFIX_MAXY;
    field_xdl = fieldNamePrefix + SUFFIX_XDL;

    fieldType.freeze();
    this.optionsFieldType = fieldType;

    int numQuads = 0;
    if ((this.hasStored = fieldType.stored())) {
        numQuads++;
    }
    if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
        numQuads++;
    }
    if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
        numQuads++;
    }
    if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType
            && ((LegacyFieldType) fieldType).numericType() != null) {
        if (hasPointVals) {
            throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
        }
        final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
        if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
            throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
        }
        numQuads++;
        legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
        legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
        legacyNumericFieldType.freeze();
    } else {
        legacyNumericFieldType = null;
    }

    if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
        xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
        xdlFieldType.setIndexOptions(IndexOptions.DOCS);
        xdlFieldType.freeze();
    } else {
        xdlFieldType = null;
    }

    this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0);
}