List of usage examples for org.apache.lucene.document StringField TYPE_NOT_STORED
FieldType TYPE_NOT_STORED
To view the source code for org.apache.lucene.document StringField TYPE_NOT_STORED.
Click Source Link
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
private void addDummy(IndexWriter iw) throws IOException { Document dummy = new Document(); Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED); dummy.add(f);// www . j av a2 s .c o m iw.addDocument(dummy); }
From source file:di.uniba.it.nlpita.index.BuildSeoDwarfIndex.java
private static void indexOntologyElement(IndexWriter writer, OntologyElementToken e, Collection<String> domainOf, Collection<String> rangeOf, Collection<String> extendedDomain) throws Exception { Document doc = new Document(); doc.add(new Field("label", e.getLabel(), TextField.TYPE_NOT_STORED)); doc.add(new IntField("id", e.getId(), IntField.TYPE_STORED)); doc.add(new Field("type", e.getType(), StringField.TYPE_NOT_STORED)); if (domainOf != null) { for (String d : domainOf) { //the first element is the URI doc.add(new Field("domainOfProperty", d, StringField.TYPE_NOT_STORED)); }//from w w w.j a v a 2 s . c o m } if (rangeOf != null) { for (String r : rangeOf) { //the first element is the URI doc.add(new Field("rangeOfProperty", r, StringField.TYPE_NOT_STORED)); } } if (extendedDomain != null) { for (String d : extendedDomain) { //the first element is the URI doc.add(new Field("propertyDomain", d, StringField.TYPE_NOT_STORED)); } } writer.addDocument(doc); }
From source file:edu.ucla.cs.scai.linkedspending.index.IndexBuilder.java
private void indexElement(IndexWriter writer, String uri, String keyWords, String type) throws IOException { Document doc = new Document(); for (String k : keywordsExtractor.normalizeWords(keyWords)) { doc.add(new Field("label", k, StringField.TYPE_NOT_STORED)); }//from w w w.ja va 2 s .c o m doc.add(new Field("uri", uri, StringField.TYPE_STORED)); doc.add(new Field("type", type, StringField.TYPE_NOT_STORED)); if (type.equals("dataset")) { System.out.println("Indexed dataset " + uri); } writer.addDocument(doc); }
From source file:fr.paris.lutece.plugins.calendar.service.search.CalendarIndexer.java
License:Open Source License
/** * Builds a document which will be used by Lucene during the indexing of the * calendar list//from www . j a v a 2 s .c o m * @param occurrence The occurence event * @param strUrl the url of the subject * @param strRoleKey The role key * @param strAgenda the calendar id * @return A Lucene {@link Document} containing QuestionAnswer Data * @throws IOException The IO Exception * @throws InterruptedException The InterruptedException */ public static org.apache.lucene.document.Document getDocument(OccurrenceEvent occurrence, String strRoleKey, String strUrl, String strAgenda) throws IOException, InterruptedException { // make a new, empty document org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); FieldType ft = new FieldType(StringField.TYPE_STORED); ft.setOmitNorms(false); FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED); ft.setOmitNorms(false); //add the id of the calendar doc.add(new Field(Constants.FIELD_CALENDAR_ID, strAgenda + "_" + Constants.CALENDAR_SHORT_NAME, ftNotStored)); //add the category of the event Collection<Category> arrayCategories = occurrence.getListCategories(); String strCategories = Constants.EMPTY_STRING; if (arrayCategories != null) { Iterator<Category> i = arrayCategories.iterator(); while (i.hasNext()) { strCategories += (i.next().getId() + BLANK); } } doc.add(new Field(Constants.FIELD_CATEGORY, strCategories, TextField.TYPE_NOT_STORED)); doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft)); // Add the url as a field named "url". Use an UnIndexed field, so // that the url is just stored with the question/answer, but is not searchable. doc.add(new Field(SearchItem.FIELD_URL, strUrl, ft)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with question/answer, it is indexed, but it is not // tokenized prior to indexing. String strIdEvent = String.valueOf(occurrence.getId()); doc.add(new Field(SearchItem.FIELD_UID, strIdEvent + "_" + Constants.CALENDAR_SHORT_NAME, ft)); // Add the last modified date of the file a field named "modified". // Use a field that is indexed (i.e. searchable), but don't tokenize // the field into words. String strDate = Utils.getDate(occurrence.getDate()); doc.add(new Field(SearchItem.FIELD_DATE, strDate, ft)); String strContentToIndex = getContentToIndex(occurrence); ContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); try { new HtmlParser().parse(new ByteArrayInputStream(strContentToIndex.getBytes()), handler, metadata, new ParseContext()); } catch (SAXException e) { throw new AppException("Error during page parsing."); } catch (TikaException e) { throw new AppException("Error during page parsing."); } //the content of the article is recovered in the parser because this one //had replaced the encoded caracters (as é) by the corresponding special caracter (as ?) StringBuilder sb = new StringBuilder(occurrence.getTitle() + " - " + handler.toString()); // Add the description as a summary field, so that index can be incrementally maintained. // This field is stored, but it is not indexed int length = AppPropertiesService.getPropertyInt(PROPERTY_DESCRIPTION_MAX_CHARACTERS, 200); String strDescription = Utils.parseHtmlToPlainTextString(occurrence.getDescription()); if (strDescription.length() > length) { strDescription = strDescription.substring(0, length) + PROPERTY_DESCRIPTION_ETC; } doc.add(new Field(SearchItem.FIELD_SUMMARY, strDescription, TextField.TYPE_STORED)); doc.add(new Field(CalendarSearchItem.FIELD_HTML_SUMMARY, occurrence.getDescription(), TextField.TYPE_STORED)); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.add(new Field(SearchItem.FIELD_CONTENTS, sb.toString(), TextField.TYPE_NOT_STORED)); // Add the subject name as a separate Text field, so that it can be searched // separately. doc.add(new Field(SearchItem.FIELD_TITLE, occurrence.getTitle(), TextField.TYPE_STORED)); doc.add(new Field(SearchItem.FIELD_TYPE, CalendarPlugin.PLUGIN_NAME, ft)); // return the document return doc; }
From source file:fr.paris.lutece.plugins.directory.service.search.DirectorySearchIndexer.java
License:Open Source License
/** * Builds a document which will be used by Lucene during the indexing of * this record//from ww w . j a v a 2 s .co m * @param record the record to convert into a document * @param listContentEntry the entries in this record that are marked as * is_indexed * @param listTitleEntry the entries in this record that are marked as * is_indexed_as_title * @param listSummaryEntry the entries in this record that are marked as * is_indexed_as_summary * @param plugin the plugin object * @return a lucene document filled with the record data */ public Document getDocument(Record record, List<IEntry> listContentEntry, List<IEntry> listTitleEntry, List<IEntry> listSummaryEntry, Plugin plugin) { Document doc = new Document(); FieldType ft = new FieldType(StringField.TYPE_STORED); ft.setOmitNorms(false); FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED); ftNotStored.setOmitNorms(false); ftNotStored.setTokenized(false); boolean bFallback = false; //Fallback if there is no entry marker as indexed_as_title //Uses the first indexed field instead if (listTitleEntry.isEmpty() && !listContentEntry.isEmpty()) { listTitleEntry.add(listContentEntry.get(0)); bFallback = true; } String strTitle = getContentToIndex(record, listTitleEntry, plugin); //Fallback if fields were empty //Uses the first indexed field instead if (StringUtils.isBlank(strTitle) && !bFallback && !listContentEntry.isEmpty()) { listTitleEntry.clear(); listTitleEntry.add(listContentEntry.get(0)); strTitle = getContentToIndex(record, listTitleEntry, plugin); } //No more fallback. Giving up if (StringUtils.isBlank(strTitle)) { return null; } doc.add(new Field(SearchItem.FIELD_TITLE, strTitle, ft)); if (!listContentEntry.isEmpty()) { String strContent = getContentToIndex(record, listContentEntry, plugin); if (StringUtils.isNotBlank(strContent)) { doc.add(new Field(SearchItem.FIELD_CONTENTS, strContent, TextField.TYPE_NOT_STORED)); } } if (!listSummaryEntry.isEmpty()) { String strSummary = getContentToIndex(record, listSummaryEntry, plugin); if (StringUtils.isNotBlank(strSummary)) { doc.add(new StoredField(SearchItem.FIELD_SUMMARY, strSummary)); } } String strRoleKey = record.getRoleKey(); if (StringUtils.isBlank(strRoleKey)) { strRoleKey = ROLE_NONE; } doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft)); String strDate = DateTools.dateToString(record.getDateCreation(), DateTools.Resolution.DAY); doc.add(new Field(SearchItem.FIELD_DATE, strDate, ft)); String strDateModification = DateTools.dateToString(record.getDateModification(), DateTools.Resolution.DAY); doc.add(new Field(SearchItem.FIELD_DATE, strDateModification, ft)); doc.add(new Field(SearchItem.FIELD_TYPE, DIRECTORY, ft)); UrlItem url = new UrlItem(AppPathService.getPortalUrl()); url.addParameter(XPageAppService.PARAM_XPAGE_APP, DIRECTORY); url.addParameter(PARAMETER_ID_DIRECTORY_RECORD, record.getIdRecord()); url.addParameter(PARAMETER_VIEW_DIRECTORY_RECORD, ""); doc.add(new Field(SearchItem.FIELD_URL, url.getUrl(), ft)); //Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with question/answer, it is indexed, but it is not // tokenized prior to indexing. String strUID = Integer.toString(record.getIdRecord()) + "_" + SHORT_NAME; doc.add(new Field(SearchItem.FIELD_UID, strUID, ftNotStored)); return doc; }
From source file:fr.paris.lutece.plugins.helpdesk.service.search.HelpdeskIndexer.java
License:Open Source License
/** * Builds a document which will be used by Lucene during the indexing of the * subject list//from w w w. ja v a 2s . co m * * @param subject the {@link Subject} to index * @param strUrl the url of the subject * @param strRoleKey The role key * @param plugin The {@link Plugin} * @return The Lucene {@link Document} containing Subject data * @throws IOException The IO Exception * @throws InterruptedException The InterruptedException */ public static org.apache.lucene.document.Document getDocument(Subject subject, String strRoleKey, String strUrl, Plugin plugin) throws IOException, InterruptedException { // make a new, empty document org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); FieldType ft = new FieldType(StringField.TYPE_STORED); ft.setOmitNorms(false); FieldType ftNotStored = new FieldType(StringField.TYPE_NOT_STORED); ftNotStored.setOmitNorms(false); // Add the url as a field named "url". Use an UnIndexed field, so // that the url is just stored with the question/answer, but is not searchable. doc.add(new Field(SearchItem.FIELD_URL, strUrl, ft)); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with question/answer, it is indexed, but it is not // tokenized prior to indexing. String strIdSubject = String.valueOf(subject.getId()); doc.add(new Field(SearchItem.FIELD_UID, strIdSubject + "_" + SHORT_NAME_SUBJECT, ftNotStored)); doc.add(new Field(SearchItem.FIELD_CONTENTS, subject.getText(), ftNotStored)); // Add the subject name as a separate Text field, so that it can be searched // separately. doc.add(new Field(SearchItem.FIELD_TITLE, subject.getText(), ft)); doc.add(new Field(SearchItem.FIELD_TYPE, HelpdeskPlugin.PLUGIN_NAME, ft)); doc.add(new Field(SearchItem.FIELD_ROLE, strRoleKey, ft)); // return the document return doc; }
From source file:indexing.IndexFiles.java
License:Apache License
/** * Parses and stores the content of each Document to the fields * //from w w w . j a v a 2 s . co m * @param doc * The Document to which data is stored * @param file * The input file which is being processed * @return The Document with all fields or null if relevant fields could not be found in the datasets * @throws IOException */ private static Document parseWikipage(Document doc, WikiPage page) throws IOException { doc.add(new TextField(Fieldname.TITLE.toString(), page.getTitle().trim(), Field.Store.YES)); // Field to allow spellchecking against the full title FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setOmitNorms(false); doc.add(new Field(Fieldname.SPELLCHECK.toString(), page.getTitle().trim(), ft)); String fre = Utils.getMostFrequentWords(page.getWikiText(), Config.numOfFreqWords); // System.out.println(fre); doc.add(new TextField(Fieldname.FREQWORDS.toString(), fre, Field.Store.YES)); // The body is only stored, not indexed. If it should be indexed use TextField // doc.add(new StoredField(Fieldname.BODY.toString(), Utils.wiki2text(page.getWikiText()))); // doc.add(new IntField(Fieldname.ID.toString(), Integer.parseInt(page.getID()), Field.Store.YES)); /* * Remove whitespace and parenthesis within the link, such that in tokenization each link is handled as one * token. */ String links = ""; for (String link : page.getLinks()) { links += link.replaceAll("[() .,'-]", "").toLowerCase() + " "; } // System.out.println(links); doc.add(new TextField(Fieldname.LINKS.toString(), links, Field.Store.YES)); /* * Remove whitespace and parenthesis within the category, such that in tokenization each category is handled as * one token. */ String categories = ""; for (String category : page.getCategories()) { categories += category.replaceAll("[() .,'-]", "").toLowerCase() + " "; } doc.add(new TextField(Fieldname.CATEGORIES.toString(), categories, Field.Store.YES)); return doc; }
From source file:org.apache.blur.analysis.type.StringFieldTypeDefinition.java
License:Apache License
@Override public Iterable<? extends Field> getFieldsForSubColumn(String family, Column column, String subName) { String name = getName(family, column.getName(), subName); Field field = new Field(name, column.getValue(), StringField.TYPE_NOT_STORED); if (isSortEnable()) { return addSort(column, name, field); }/*from w w w .j a v a2s . c o m*/ return makeIterable(field); }
From source file:org.apache.blur.mapreduce.lib.v2.DocumentWritable.java
License:Apache License
private void writeStringField(DataOutput out, StringField stringField) throws IOException { FieldType fieldType = stringField.fieldType(); if (fieldType.equals(StringField.TYPE_STORED)) { out.writeBoolean(true);//from w w w .j a v a 2 s.com } else if (fieldType.equals(StringField.TYPE_NOT_STORED)) { out.writeBoolean(false); } else { throw new IOException("Non default FieldTypes for StringField not supported."); } writeString(out, stringField.stringValue()); }
From source file:org.apache.solr.legacy.BBoxStrategy.java
License:Apache License
/** * Creates this strategy.//from ww w.j av a 2 s .c om * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL * field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features * aren't needed then disable them. */ public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) { super(ctx, fieldNamePrefix); field_bbox = fieldNamePrefix; field_minX = fieldNamePrefix + SUFFIX_MINX; field_maxX = fieldNamePrefix + SUFFIX_MAXX; field_minY = fieldNamePrefix + SUFFIX_MINY; field_maxY = fieldNamePrefix + SUFFIX_MAXY; field_xdl = fieldNamePrefix + SUFFIX_XDL; fieldType.freeze(); this.optionsFieldType = fieldType; int numQuads = 0; if ((this.hasStored = fieldType.stored())) { numQuads++; } if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) { numQuads++; } if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { numQuads++; } if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType) fieldType).numericType() != null) { if (hasPointVals) { throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); } final LegacyFieldType legacyType = (LegacyFieldType) fieldType; if (legacyType.numericType() != LegacyNumericType.DOUBLE) { throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); } numQuads++; legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); legacyNumericFieldType.freeze(); } else { legacyNumericFieldType = null; } if (hasPointVals || legacyNumericFieldType != null) { // if we have an index... xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED); xdlFieldType.setIndexOptions(IndexOptions.DOCS); xdlFieldType.freeze(); } else { xdlFieldType = null; } this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0); }