List of usage examples for org.apache.lucene.document Field setStringValue
public void setStringValue(String value)
Expert: change the value of this field.
From source file:biospectra.index.Indexer.java
License:Apache License
public synchronized void index(File fastaDoc, File taxonDoc) throws Exception { if (fastaDoc == null) { throw new IllegalArgumentException("fastaDoc is null"); }/* w w w . j a va 2 s . c om*/ String taxonTree = ""; if (taxonDoc != null && taxonDoc.exists()) { FileReader reader = new FileReader(taxonDoc); taxonTree = IOUtils.toString(reader); IOUtils.closeQuietly(reader); } FASTAReader reader = FastaFileReader.getFASTAReader(fastaDoc); FASTAEntry read = null; while ((read = reader.readNext()) != null) { String headerLine = read.getHeaderLine(); if (headerLine.startsWith(">")) { headerLine = headerLine.substring(1); } final String f_filename = fastaDoc.getName(); final String sequence = read.getSequence(); final String header = headerLine; final String f_taxonTree = taxonTree; final boolean f_minStrandKmer = this.minStrandKmer; Runnable worker = new Runnable() { @Override public void run() { try { Document doc = freeQueue.poll(); if (doc == null) { doc = new Document(); Field filenameField = new StringField(IndexConstants.FIELD_FILENAME, "", Field.Store.YES); Field headerField = new StringField(IndexConstants.FIELD_HEADER, "", Field.Store.YES); Field sequenceDirectionField = new StringField(IndexConstants.FIELD_SEQUENCE_DIRECTION, "", Field.Store.YES); Field taxonTreeField = new StringField(IndexConstants.FIELD_TAXONOMY_TREE, "", Field.Store.YES); Field sequenceField = new TextField(IndexConstants.FIELD_SEQUENCE, "", Field.Store.NO); doc.add(filenameField); doc.add(headerField); doc.add(sequenceDirectionField); doc.add(taxonTreeField); doc.add(sequenceField); } StringField filenameField = (StringField) doc.getField(IndexConstants.FIELD_FILENAME); StringField headerField = (StringField) doc.getField(IndexConstants.FIELD_HEADER); StringField sequenceDirectionField = (StringField) doc .getField(IndexConstants.FIELD_SEQUENCE_DIRECTION); StringField taxonTreeField = (StringField) doc.getField(IndexConstants.FIELD_TAXONOMY_TREE); TextField sequenceField = (TextField) doc.getField(IndexConstants.FIELD_SEQUENCE); filenameField.setStringValue(f_filename); headerField.setStringValue(header); taxonTreeField.setStringValue(f_taxonTree); if (f_minStrandKmer) { // min-strand sequenceDirectionField.setStringValue("min_strand"); sequenceField.setStringValue(sequence); indexWriter.addDocument(doc); } else { // forward-strand sequenceDirectionField.setStringValue("forward"); sequenceField.setStringValue(sequence); indexWriter.addDocument(doc); // reverse-strand sequenceDirectionField.setStringValue("reverse"); sequenceField.setStringValue(SequenceHelper.getReverseComplement(sequence)); indexWriter.addDocument(doc); } freeQueue.offer(doc); } catch (Exception ex) { LOG.error("Exception occurred during index construction", ex); } } }; this.executor.execute(worker); } reader.close(); }
From source file:brightsolid.solr.plugins.TestTargetPositionQueryFuzzy.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();//from w ww. j a v a2 s . co m RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two threx"); iw.addDocument(doc); field.setStringValue("two threx one"); iw.addDocument(doc); field.setStringValue("threx one twp"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySimple.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory();/*from w w w . java2 s . c o m*/ RandomIndexWriter iw = new RandomIndexWriter(random(), directory); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("two"); iw.addDocument(doc); field.setStringValue("one"); iw.addDocument(doc); field.setStringValue("one two"); iw.addDocument(doc); field.setStringValue("two one"); iw.addDocument(doc); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("three two one"); iw.addDocument(doc); field.setStringValue("one one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:brightsolid.solr.plugins.TestTargetPositionQuerySynonyms.java
License:Apache License
@Override public void setUp() throws Exception { super.setUp(); String testFile = "one, uno, un\n" + "two, dos, too\n" + "three, free, tres"; SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random())); parser.parse(new StringReader(testFile)); final SynonymMap map = parser.build(); Analyzer analyzer = new Analyzer() { @Override//from w w w . j a v a 2s . co m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false)); } }; directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); FieldType newType = new FieldType(org.apache.lucene.document.TextField.TYPE_STORED); newType.setOmitNorms(true); Field field = newField("field", "", newType); field.fieldType().setOmitNorms(true); doc.add(field); field.setStringValue("one two three"); iw.addDocument(doc); field.setStringValue("two three one"); iw.addDocument(doc); field.setStringValue("three one two"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }//from w w w . j av a 2 s.com String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java
License:Open Source License
protected synchronized void add(T object, IndexWriter indexWriter) { int docNum = getDocNum(object); if (docNum == -1) { Document doc = new Document(); Field[] indexFields = createIndexFields(); for (Field indexField : indexFields) { populateIndexField(object, indexField, doc); if (StringUtils.isEmpty(indexField.stringValue())) { indexField.setStringValue(NULL_VALUE); }/*from w ww . j a v a 2s. co m*/ doc.add(indexField); } try { indexWriter.addDocument(doc); } catch (CorruptIndexException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } } else { throw new RuntimeException("Document dj existant! (docNum:" + docNum + ")"); } }
From source file:com.doculibre.constellio.lucene.impl.FreeTextTagIndexHelperImpl.java
License:Open Source License
@Override protected void populateIndexField(FreeTextTag object, Field indexField, Document doc) { String indexFieldName = indexField.name(); if (indexFieldName.equals(ID)) { indexField.setStringValue(object.getId().toString()); } else if (indexFieldName.equals(FREE_TEXT)) { indexField.setStringValue(object.getFreeText()); }/*from w w w . java 2s . c o m*/ }
From source file:com.doculibre.constellio.lucene.impl.SkosIndexHelperImpl.java
License:Open Source License
@Override protected void populateIndexField(SkosConcept skosConcept, Field indexField, Document doc) { String indexFieldName = indexField.name(); List<Locale> supportedLocales = ConstellioSpringUtils.getSupportedLocales(); if (indexFieldName.equals(ID)) { indexField.setStringValue(skosConcept.getId().toString()); } else if (indexFieldName.equals(THESAURUS_ID)) { indexField.setStringValue(skosConcept.getThesaurus().getId().toString()); } else if (indexFieldName.equals(PREF_LABEL)) { boolean first = true; for (I18NLabel prefLabel : skosConcept.getPrefLabels()) { for (Locale locale : prefLabel.getValues().keySet()) { if (supportedLocales.contains(locale)) { if (first) { indexField.setStringValue(prefLabel.getValue(locale)); first = false;//w w w .j a va2 s .c o m } else { Field extraField = createDefaultIndexField(PREF_LABEL); extraField.setStringValue(prefLabel.getValue(locale)); doc.add(extraField); } } } } } else if (indexFieldName.equals(ALT_LABEL)) { boolean first = true; for (SkosConceptAltLabel altLabel : skosConcept.getAltLabels()) { Locale locale = altLabel.getLocale(); if (supportedLocales.contains(locale)) { for (String altLabelValue : skosConcept.getAltLabels(locale)) { if (first) { indexField.setStringValue(altLabelValue); first = false; } else { Field extraField = createDefaultIndexField(ALT_LABEL); extraField.setStringValue(altLabelValue); doc.add(extraField); } } } } } else if (indexFieldName.startsWith(PREF_LABEL)) { if (indexFieldName.indexOf("_") != -1) { String language = StringUtils.substringAfter(indexFieldName, "_"); Locale locale = new Locale(language); String prefLabel = skosConcept.getPrefLabel(locale); if (StringUtils.isNotBlank(prefLabel)) { indexField.setStringValue(prefLabel); } } } else if (indexFieldName.startsWith(ALT_LABEL)) { String language = StringUtils.substringAfter(indexFieldName, "_"); Locale locale = new Locale(language); String prefLabel = skosConcept.getPrefLabel(locale); if (StringUtils.isNotBlank(prefLabel)) { boolean first = true; for (String altLabelValue : skosConcept.getAltLabels(locale)) { if (first) { indexField.setStringValue(altLabelValue); first = false; } else { Field extraField = createDefaultIndexField(indexFieldName); extraField.setStringValue(altLabelValue); doc.add(extraField); } } } } }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();/* w w w. ja v a2 s . c o m*/ Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }
From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java
License:Apache License
private void doTestMixedPostings(Codec codec) throws Exception { Directory dir = newDirectory();/*from w ww . java 2s . c o m*/ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setCodec(codec); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // turn on vectors for the checkindex cross-check ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); Field idField = new Field("id", "", ft); Field dateField = new Field("date", "", ft); doc.add(idField); doc.add(dateField); for (int i = 0; i < 100; i++) { idField.setStringValue(Integer.toString(random().nextInt(50))); dateField.setStringValue(Integer.toString(random().nextInt(100))); iw.addDocument(doc); } iw.close(); dir.close(); // checkindex }