List of usage examples for org.apache.lucene.document Field Field
public Field(String name, CharSequence value, IndexableFieldType type)
From source file:alix.lucene.Alix.java
License:Open Source License
private static void addField(String name, String value, String options, float boost) { // do not add field for null ? if (value == null) return;/*from w w w . java 2 s. c om*/ if (doc == null) { System.err.println("Please call docNew() before field()!"); return; } if (name == Alix.FILENAME) { System.err.println(name + " is a reserved field name for Alix."); return; } Field field; if (options == null || "".equals(options)) { field = new StringField(name, value, Store.YES); } else if (options.contains("#")) { field = new IntField(name, Integer.parseInt(value), Field.Store.YES); } else if (options.contains(".")) { field = new FloatField(name, Float.parseFloat(value), Field.Store.YES); } else { field = new Field(name, value, fieldType(options)); } if (boost > 0) field.setBoost(boost); doc.add(field); }
From source file:api.startup.PDFIndexer.java
License:Open Source License
/** * Indexes a single document and writes it to the given index writer * @param writer - the index writer to writer * @param metadata - the document/*from ww w . jav a 2 s . co m*/ * @throws IOException */ static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException { Path file = Paths.get(metadata.getFilename()); try { Document doc = new Document(); Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES); doc.add(pathField); // Add Document metadata // doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES)); doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES)); // End of Document Metadata // Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(), Field.Store.YES); doc.add(modified); PDFTextExtractor extractor = new PDFTextExtractor(); // Get the string contents String textContents = extractor.extractText(file.toString()); // Store the string contents FieldType contentsType = new FieldType(); contentsType.setStored(true); contentsType.setTokenized(true); contentsType.setStoreTermVectors(true); contentsType.setStoreTermVectorPositions(true); contentsType.setStoreTermVectorPayloads(true); contentsType.setStoreTermVectorOffsets(true); contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType); doc.add(contents); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): log.info("adding " + file + " to index"); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: log.info("updating " + file + " in index"); writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc); } } catch (IOException e) { log.error("Failed to read file " + metadata.getFilename()); } }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * Creates lucene documents from SKOS concept. In order to allow language * restrictions, one document per language is created. *//*from www . j a v a2 s . c o m*/ private Document createDocumentsFromConcept(Resource skos_concept) { Document conceptDoc = new Document(); String conceptURI = skos_concept.getURI(); Field uriField = new Field(FIELD_URI, conceptURI, StringField.TYPE_STORED); conceptDoc.add(uriField); // store the preferred lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.prefLabel, FIELD_PREF_LABEL); // store the alternative lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.altLabel, FIELD_ALT_LABEL); // store the hidden lexical labels indexAnnotation(skos_concept, conceptDoc, SKOS.hiddenLabel, FIELD_HIDDEN_LABEL); // store the URIs of the broader concepts indexObject(skos_concept, conceptDoc, SKOS.broader, FIELD_BROADER); // store the URIs of the broader transitive concepts indexObject(skos_concept, conceptDoc, SKOS.broaderTransitive, FIELD_BROADER_TRANSITIVE); // store the URIs of the narrower concepts indexObject(skos_concept, conceptDoc, SKOS.narrower, FIELD_NARROWER); // store the URIs of the narrower transitive concepts indexObject(skos_concept, conceptDoc, SKOS.narrowerTransitive, FIELD_NARROWER_TRANSITIVE); // store the URIs of the related concepts indexObject(skos_concept, conceptDoc, SKOS.related, FIELD_RELATED); return conceptDoc; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
private void indexAnnotation(Resource skos_concept, Document conceptDoc, AnnotationProperty property, String field) {/*from w ww .ja v a2 s . c om*/ StmtIterator stmt_iter = skos_concept.listProperties(property); while (stmt_iter.hasNext()) { Literal labelLiteral = stmt_iter.nextStatement().getObject().as(Literal.class); String label = labelLiteral.getLexicalForm(); String labelLang = labelLiteral.getLanguage(); if (this.languages != null && !this.languages.isEmpty() && !this.languages.contains(labelLang)) { continue; } // converting label to lower-case label = label.toLowerCase(Locale.ROOT); Field labelField = new Field(field, label, StringField.TYPE_STORED); conceptDoc.add(labelField); } }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
private void indexObject(Resource skos_concept, Document conceptDoc, ObjectProperty property, String field) { StmtIterator stmt_iter = skos_concept.listProperties(property); while (stmt_iter.hasNext()) { RDFNode concept = stmt_iter.nextStatement().getObject(); if (!concept.canAs(Resource.class)) { logger.warn("Error when indexing relationship of concept " + skos_concept.getURI() + " ."); continue; }//www . j av a 2s . co m Resource resource = concept.as(Resource.class); Field conceptField = new Field(field, resource.getURI(), TextField.TYPE_STORED); conceptDoc.add(conceptField); } }
From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.SKOSStandardQueryParserTest.java
License:Apache License
@Test public void queryParserSearch() throws IOException, QueryNodeException { Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED)); writer.addDocument(doc);//from www . j a va2 s . co m searcher = new IndexSearcher(DirectoryReader.open(writer, false)); Query query = new SKOSStandardQueryParser(skosAnalyzer).parse("\"fox jumps\"", "content"); Assert.assertEquals(1, TestUtil.hitCount(searcher, query)); Assert.assertEquals("content:\"fox (jumps hops leaps)\"", query.toString()); Assert.assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName()); query = new StandardQueryParser(new StandardAnalyzer(matchVersion)).parse("\"fox jumps\"", "content"); Assert.assertEquals(1, TestUtil.hitCount(searcher, query)); Assert.assertEquals("content:\"fox jumps\"", query.toString()); Assert.assertEquals("org.apache.lucene.search.PhraseQuery", query.getClass().getName()); }
From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.SKOSStandardQueryParserTest.java
License:Apache License
@Test public void queryParserSearchWithBoosts() throws IOException, QueryNodeException { Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED)); writer.addDocument(doc);//w w w.java 2 s.com searcher = new IndexSearcher(DirectoryReader.open(writer, false)); SKOSStandardQueryParser parser = new SKOSStandardQueryParser(skosAnalyzer); parser.setBoost(SKOSType.ALT, 0.5f); Query query = parser.parse("\"fox jumps\"", "content"); Assert.assertEquals(1, TestUtil.hitCount(searcher, query)); // boosts do not work in phrase queries Assert.assertEquals("content:\"fox (jumps hops leaps)\"", query.toString()); Assert.assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName()); query = parser.parse("fox jumps", "content"); Assert.assertEquals(1, TestUtil.hitCount(searcher, query)); Assert.assertEquals("content:fox (content:jumps content:hops^0.5 content:leaps^0.5)", query.toString()); Assert.assertEquals("org.apache.lucene.search.BooleanQuery", query.getClass().getName()); query = new SKOSStandardQueryParser(new StandardAnalyzer(matchVersion)).parse("fox jumps", "content"); Assert.assertEquals(1, TestUtil.hitCount(searcher, query)); Assert.assertEquals("content:fox content:jumps", query.toString()); Assert.assertEquals("org.apache.lucene.search.BooleanQuery", query.getClass().getName()); }
From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java
License:Apache License
@Test public void termQuerySearch() throws IOException { Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED)); writer.addDocument(doc);/*from w w w .ja va2 s. c o m*/ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TermQuery tq = new TermQuery(new Term("content", "hops")); assertEquals(1, searcher.search(tq, 1).totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java
License:Apache License
@Test public void phraseQuerySearch() throws IOException { Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED)); writer.addDocument(doc);//from ww w . j a v a2s . c o m searcher = new IndexSearcher(DirectoryReader.open(writer, false)); PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("content", "fox")).add(new Term("content", "hops")); assertEquals(1, searcher.search(builder.build(), 1).totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java
License:Apache License
@Test public void queryParserSearch() throws IOException, QueryNodeException { Document doc = new Document(); doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED)); writer.addDocument(doc);/*from w ww . j a v a 2s. com*/ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); Query query = new StandardQueryParser(skosAnalyzer).parse("\"fox jumps\"", "content"); assertEquals(1, searcher.search(query, 1).totalHits); assertEquals("content:\"fox (jumps hops leaps)\"", query.toString()); assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName()); query = new StandardQueryParser(new StandardAnalyzer()).parse("\"fox jumps\"", "content"); assertEquals(1, searcher.search(query, 1).totalHits); assertEquals("content:\"fox jumps\"", query.toString()); assertEquals("org.apache.lucene.search.PhraseQuery", query.getClass().getName()); }