Example usage for org.apache.lucene.document Field Field

List of usage examples for org.apache.lucene.document Field Field

Introduction

In this page you can find the example usage for org.apache.lucene.document Field Field.

Prototype

public Field(String name, CharSequence value, IndexableFieldType type) 

Source Link

Document

Create field with String value.

Usage

From source file:alix.lucene.Alix.java

License:Open Source License

private static void addField(String name, String value, String options, float boost) {
    // do not add field for null ?
    if (value == null)
        return;/*from w w w  . java  2  s.  c om*/
    if (doc == null) {
        System.err.println("Please call docNew() before field()!");
        return;
    }
    if (name == Alix.FILENAME) {
        System.err.println(name + " is a reserved field name for Alix.");
        return;
    }
    Field field;
    if (options == null || "".equals(options)) {
        field = new StringField(name, value, Store.YES);
    } else if (options.contains("#")) {
        field = new IntField(name, Integer.parseInt(value), Field.Store.YES);
    } else if (options.contains(".")) {
        field = new FloatField(name, Float.parseFloat(value), Field.Store.YES);
    } else {
        field = new Field(name, value, fieldType(options));
    }
    if (boost > 0)
        field.setBoost(boost);
    doc.add(field);
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document/*from  ww w . jav  a 2  s  . co m*/
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Creates lucene documents from SKOS concept. In order to allow language
 * restrictions, one document per language is created.
 *//*from www  .  j  a v  a2  s . c o  m*/
private Document createDocumentsFromConcept(Resource skos_concept) {
    Document conceptDoc = new Document();
    String conceptURI = skos_concept.getURI();
    Field uriField = new Field(FIELD_URI, conceptURI, StringField.TYPE_STORED);
    conceptDoc.add(uriField);
    // store the preferred lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.prefLabel, FIELD_PREF_LABEL);
    // store the alternative lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.altLabel, FIELD_ALT_LABEL);
    // store the hidden lexical labels
    indexAnnotation(skos_concept, conceptDoc, SKOS.hiddenLabel, FIELD_HIDDEN_LABEL);
    // store the URIs of the broader concepts
    indexObject(skos_concept, conceptDoc, SKOS.broader, FIELD_BROADER);
    // store the URIs of the broader transitive concepts
    indexObject(skos_concept, conceptDoc, SKOS.broaderTransitive, FIELD_BROADER_TRANSITIVE);
    // store the URIs of the narrower concepts
    indexObject(skos_concept, conceptDoc, SKOS.narrower, FIELD_NARROWER);
    // store the URIs of the narrower transitive concepts
    indexObject(skos_concept, conceptDoc, SKOS.narrowerTransitive, FIELD_NARROWER_TRANSITIVE);
    // store the URIs of the related concepts
    indexObject(skos_concept, conceptDoc, SKOS.related, FIELD_RELATED);
    return conceptDoc;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

private void indexAnnotation(Resource skos_concept, Document conceptDoc, AnnotationProperty property,
        String field) {/*from   w ww  .ja v  a2  s  .  c om*/
    StmtIterator stmt_iter = skos_concept.listProperties(property);
    while (stmt_iter.hasNext()) {
        Literal labelLiteral = stmt_iter.nextStatement().getObject().as(Literal.class);
        String label = labelLiteral.getLexicalForm();
        String labelLang = labelLiteral.getLanguage();
        if (this.languages != null && !this.languages.isEmpty() && !this.languages.contains(labelLang)) {
            continue;
        }
        // converting label to lower-case
        label = label.toLowerCase(Locale.ROOT);
        Field labelField = new Field(field, label, StringField.TYPE_STORED);
        conceptDoc.add(labelField);
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

private void indexObject(Resource skos_concept, Document conceptDoc, ObjectProperty property, String field) {
    StmtIterator stmt_iter = skos_concept.listProperties(property);
    while (stmt_iter.hasNext()) {
        RDFNode concept = stmt_iter.nextStatement().getObject();
        if (!concept.canAs(Resource.class)) {
            logger.warn("Error when indexing relationship of concept " + skos_concept.getURI() + " .");
            continue;
        }//www . j av  a  2s .  co m
        Resource resource = concept.as(Resource.class);
        Field conceptField = new Field(field, resource.getURI(), TextField.TYPE_STORED);
        conceptDoc.add(conceptField);
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.SKOSStandardQueryParserTest.java

License:Apache License

@Test
public void queryParserSearch() throws IOException, QueryNodeException {

    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED));

    writer.addDocument(doc);//from  www  .  j  a va2  s .  co  m

    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    Query query = new SKOSStandardQueryParser(skosAnalyzer).parse("\"fox jumps\"", "content");

    Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

    Assert.assertEquals("content:\"fox (jumps hops leaps)\"", query.toString());
    Assert.assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName());

    query = new StandardQueryParser(new StandardAnalyzer(matchVersion)).parse("\"fox jumps\"", "content");
    Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

    Assert.assertEquals("content:\"fox jumps\"", query.toString());
    Assert.assertEquals("org.apache.lucene.search.PhraseQuery", query.getClass().getName());

}

From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.SKOSStandardQueryParserTest.java

License:Apache License

@Test
public void queryParserSearchWithBoosts() throws IOException, QueryNodeException {

    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED));

    writer.addDocument(doc);//w  w w.java  2  s.com

    searcher = new IndexSearcher(DirectoryReader.open(writer, false));

    SKOSStandardQueryParser parser = new SKOSStandardQueryParser(skosAnalyzer);
    parser.setBoost(SKOSType.ALT, 0.5f);

    Query query = parser.parse("\"fox jumps\"", "content");

    Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

    // boosts do not work in phrase queries
    Assert.assertEquals("content:\"fox (jumps hops leaps)\"", query.toString());
    Assert.assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName());

    query = parser.parse("fox jumps", "content");

    Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

    Assert.assertEquals("content:fox (content:jumps content:hops^0.5 content:leaps^0.5)", query.toString());
    Assert.assertEquals("org.apache.lucene.search.BooleanQuery", query.getClass().getName());

    query = new SKOSStandardQueryParser(new StandardAnalyzer(matchVersion)).parse("fox jumps", "content");
    Assert.assertEquals(1, TestUtil.hitCount(searcher, query));

    Assert.assertEquals("content:fox content:jumps", query.toString());
    Assert.assertEquals("org.apache.lucene.search.BooleanQuery", query.getClass().getName());

}

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java

License:Apache License

@Test
public void termQuerySearch() throws IOException {
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED));
    writer.addDocument(doc);/*from w  w w .ja va2  s. c o m*/
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    TermQuery tq = new TermQuery(new Term("content", "hops"));
    assertEquals(1, searcher.search(tq, 1).totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java

License:Apache License

@Test
public void phraseQuerySearch() throws IOException {
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED));
    writer.addDocument(doc);//from ww w  .  j a v a2s  .  c o  m
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(new Term("content", "fox")).add(new Term("content", "hops"));
    assertEquals(1, searcher.search(builder.build(), 1).totalHits);
}

From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSLabelFilterTest.java

License:Apache License

@Test
public void queryParserSearch() throws IOException, QueryNodeException {
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", TextField.TYPE_STORED));
    writer.addDocument(doc);/*from w  ww . j  a v  a  2s.  com*/
    searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    Query query = new StandardQueryParser(skosAnalyzer).parse("\"fox jumps\"", "content");
    assertEquals(1, searcher.search(query, 1).totalHits);
    assertEquals("content:\"fox (jumps hops leaps)\"", query.toString());
    assertEquals("org.apache.lucene.search.MultiPhraseQuery", query.getClass().getName());
    query = new StandardQueryParser(new StandardAnalyzer()).parse("\"fox jumps\"", "content");
    assertEquals(1, searcher.search(query, 1).totalHits);
    assertEquals("content:\"fox jumps\"", query.toString());
    assertEquals("org.apache.lucene.search.PhraseQuery", query.getClass().getName());
}