Example usage for org.apache.lucene.document StoredField StoredField

List of usage examples for org.apache.lucene.document StoredField StoredField

Introduction

In this page you can find the example usage for org.apache.lucene.document StoredField StoredField.

Prototype

public StoredField(String name, double value) 

Source Link

Document

Create a stored-only field with the given double value.

Usage

From source file:de.ids_mannheim.korap.index.FieldDocument.java

public void addStored(String key, String value) {
    doc.add(new StoredField(key, value));
}

From source file:de.ids_mannheim.korap.index.FieldDocument.java

public void addStored(String key, int value) {
    doc.add(new StoredField(key, value));
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Tries to cast field value (object) to a string or byte[].
 * If the field value is not a string or a byte[] then the method ignores the field.
 * <p>// w w w.  j  a va  2  s  .c  o m
 * If the value is a string or byte[] than the value is warped into a matching lucene field (Field for String,
 * StoredField for byte[]) and added to the lucene document.
 *
 * @param document   the lucene document to add the number
 * @param fieldName  the field name
 * @param fieldValue the field value
 */
private static void handleStringsAndBytes(Document document, String fieldName, Object fieldValue) {
    if (fieldValue instanceof String) {
        document.add(new Field(fieldName, fieldValue.toString(), TextField.TYPE_STORED));
    } else if (fieldValue instanceof byte[]) {
        document.add(new StoredField(fieldName, new BytesRef((byte[]) fieldValue)));
    }
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Tries to cast field value (object) to a number (double, integer, float, long).
 * If the field value is not a number then method ignores the field.
 * <p>/*from  w  w  w.j  a  v  a2 s  .  co m*/
 * If the value is a number than the value is warped into a matching lucene field (IntField, DoubleField, ...)
 * and added to the lucene document.
 *
 * @param document   the lucene document to add the number
 * @param fieldName  the field name
 * @param fieldValue the field value
 */
private static void handleNumbers(Document document, String fieldName, Object fieldValue) {
    if (fieldValue instanceof Double) {
        document.add(new StoredField(fieldName, Double.parseDouble(fieldValue.toString())));
    } else if (fieldValue instanceof Integer) {
        document.add(new StoredField(fieldName, Integer.parseInt(fieldValue.toString())));
    } else if (fieldValue instanceof Float) {
        document.add(new StoredField(fieldName, Float.parseFloat(fieldValue.toString())));
    } else if (fieldValue instanceof Long) {
        document.add(new StoredField(fieldName, Long.parseLong(fieldValue.toString())));
    } else {
        LOGGER.warn("Cloud not extract value from field {} with value {}", fieldName, fieldValue);
    }

}

From source file:de.tudarmstadt.lt.lm.app.GenerateNgramIndex.java

License:Apache License

public void create_ngram_index(File ngram_joined_counts_file) throws IOException {
    File index_dir = new File(_index_dir, "ngram");
    if (index_dir.exists()) {
        LOG.info("Ngram index already exists in directory '{}'.", index_dir.getAbsolutePath());
        if (_overwrite) {
            LOG.info("Overwriting index '{}',", index_dir);
            index_dir.delete();/*w w  w.j a va2s. c  o m*/
        } else
            return;
    }
    index_dir.mkdirs();

    Analyzer analyzer = new KeywordAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // use 80 percent of the available total memory
    double total_mem_mb = (double) Runtime.getRuntime().maxMemory() / 1e6;
    double percentage_ram_buffer = Properties.ramBufferPercentage();
    if (percentage_ram_buffer > 0) {
        double percentage_ram_buffer_mb = total_mem_mb * percentage_ram_buffer;
        LOG.info(String.format("Setting ram buffer size to %.2f MB (%.2f%% from %.2f MB)",
                percentage_ram_buffer_mb, percentage_ram_buffer * 100, total_mem_mb));
        iwc.setRAMBufferSizeMB(percentage_ram_buffer_mb);
    }

    Directory directory = new MMapDirectory(index_dir);
    IndexWriter writer_ngram = new IndexWriter(directory, iwc);

    InputStream in = new FileInputStream(ngram_joined_counts_file);
    if (ngram_joined_counts_file.getName().endsWith(".gz"))
        in = new GZIPInputStream(in);
    LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in, "UTF-8")));

    Document doc = new Document();
    Field f_ngram = new StringField("ngram", "", Store.YES);
    doc.add(f_ngram);
    Field f_n = new IntField("cardinality", 0, Store.YES);
    doc.add(f_n);
    Field f_word = new StringField("word", "", Store.YES);
    doc.add(f_word);
    Field f_hist = new StringField("history", "", Store.YES);
    doc.add(f_hist);
    Field f_lower = new StringField("lower", "", Store.YES);
    doc.add(f_lower);
    Field f_count = new StoredField("num", 0L);
    doc.add(f_count);

    Field[] f_follow = new Field[4];
    f_follow[0] = new StoredField("nf_s", 0L);
    doc.add(f_follow[0]);
    f_follow[1] = new StoredField("nf_N1", 0L);
    doc.add(f_follow[1]);
    f_follow[2] = new StoredField("nf_N2", 0L);
    doc.add(f_follow[2]);
    f_follow[3] = new StoredField("nf_N3", 0L);
    doc.add(f_follow[3]);
    Field[] f_precede = new Field[4];
    f_precede[0] = new StoredField("np_s", 0L);
    doc.add(f_precede[0]);
    f_precede[1] = new StoredField("np_N1", 0L);
    doc.add(f_precede[1]);
    f_precede[2] = new StoredField("np_N2", 0L);
    doc.add(f_precede[2]);
    f_precede[3] = new StoredField("np_N3", 0L);
    doc.add(f_precede[3]);
    Field[] f_followerprecede = new Field[4];
    f_followerprecede[0] = new StoredField("nfp_s", 0L);
    doc.add(f_followerprecede[0]);
    f_followerprecede[1] = new StoredField("nfp_N1", 0L);
    doc.add(f_followerprecede[1]);
    f_followerprecede[2] = new StoredField("nfp_N2", 0L);
    doc.add(f_followerprecede[2]);
    f_followerprecede[3] = new StoredField("nfp_N3", 0L);
    doc.add(f_followerprecede[3]);

    Long[][] N = new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } };
    Long[] S = new Long[] { 0L };
    long c = 0;
    while (iter.hasNext()) {
        if (++c % 100000 == 0)
            LOG.info("Adding {}'th ngram.", c);
        String line = iter.next();
        try {
            String[] splits = de.tudarmstadt.lt.utilities.StringUtils.rtrim(line).split("\t");
            String ngram_str = splits[0];
            if (de.tudarmstadt.lt.utilities.StringUtils.trim(ngram_str).isEmpty()) {
                LOG.warn("Ngram is empty, skipping line {}: '{}' (file '{}').", c, line,
                        ngram_joined_counts_file);
                continue;
            }

            List<String> ngram = Arrays.asList(ngram_str.split(" "));
            long num = Long.parseLong(splits[1]);
            int n = ngram.size();

            f_ngram.setStringValue(ngram_str);
            f_n.setIntValue(n);
            f_word.setStringValue(ngram.get(ngram.size() - 1));
            f_hist.setStringValue(StringUtils.join(ngram.subList(0, ngram.size() - 1), " "));
            f_lower.setStringValue(StringUtils.join(ngram.subList(1, ngram.size()), " "));
            f_count.setLongValue(num);

            for (int j = 0; j < f_follow.length; j++) {
                f_follow[j].setLongValue(0L);
                f_precede[j].setLongValue(0L);
                f_followerprecede[j].setLongValue(0L);
            }

            if (splits.length > 2 && !splits[2].isEmpty()) {
                // precede or follow or followerprecede
                String[] splits_ = splits[2].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }
            if (splits.length > 3 && !splits[3].isEmpty()) {
                // should be follow or followerprecede
                String[] splits_ = splits[3].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }
            if (splits.length > 4 && !splits[4].isEmpty()) {
                // should be followerprecede
                String[] splits_ = splits[4].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }

            writer_ngram.addDocument(doc);

            while (N.length <= n) {
                N = ArrayUtils.getConcatinatedArray(N, new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } });
                S = ArrayUtils.getConcatinatedArray(S, new Long[] { 0L });
            }

            if (num == 1L)
                N[n][1]++;
            else if (num == 2L)
                N[n][2]++;
            else if (num == 3L)
                N[n][3]++;
            else if (num == 4L)
                N[n][4]++;
            else
                N[n][5]++;
            N[n][0]++;
            S[n] += num;

        } catch (Exception e) {
            LOG.error("Could not process line '{}' in file '{}:{}', malformed line.", line,
                    ngram_joined_counts_file, c, e);
        }
    }

    writer_ngram.forceMergeDeletes();
    writer_ngram.commit();
    writer_ngram.close();

    StringBuilder b = new StringBuilder(String.format(
            "#%n# Number of times where an ngram occurred: %n#  at_least_once, exactly_once, exactly_twice, exactly_three_times, exactly_four_times, five_times_or_more.%n#%nmax_n=%d%nmax_c=6%n",
            N.length - 1));
    for (int n = 1; n < N.length; n++)
        b.append(String.format("n%d=%s%n", n, StringUtils.join(N[n], ',')));
    for (int n = 1; n < S.length; n++)
        b.append(String.format("s%d=%d%n", n, S[n]));
    FileUtils.writeStringToFile(new File(_index_dir, "__sum_ngrams__"), b.toString());

}

From source file:di.uniba.it.tee2.index.TemporalEventIndexing.java

License:Open Source License

/**
 * Crea e memorizza un documento xml a partire dalla stringa fornita in
 * input dopo averla taggata usando HeidelTime.
 *
 * @param title/*ww  w  . j  a  v a2  s.  c o m*/
 * @param content
 * @param fileName
 * @param docID
 * @param wikiID
 * @param revisionID
 * @throws java.lang.Exception
 */
public void add(String title, String content, String fileName, String docID, int wikiID, int revisionID)
        throws Exception {
    TaggedText tt = null;
    try {
        tt = tempExtractor.process(content);
    } catch (Exception ex) {
        logger.log(Level.WARNING, "Error to process doc " + docID + " (skip doc)", ex);
    }
    if (tt != null) {

        //stores id and text (not tagged) in docrep_index (document repository)
        Document docrep_doc = new Document();
        docrep_doc.add(new StringField("id", docID, Field.Store.YES));
        docrep_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        docrep_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        docrep_doc.add(new StringField("title", title, Field.Store.YES));
        docrep_doc.add(new StoredField("content", tt.getText()));
        docrep_doc.add(new StringField("filename", fileName, Field.Store.YES));
        docrep_writer.addDocument(docrep_doc);

        //stores id and text (not tagged) in doc_index for search
        Document doc_doc = new Document();
        doc_doc.add(new StringField("id", docID, Field.Store.YES));
        doc_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        doc_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        doc_doc.add(new TextField("title", title, Field.Store.NO));
        doc_doc.add(new TextField("content", tt.getText(), Field.Store.NO));
        doc_writer.addDocument(doc_doc);

        logger.log(Level.FINE, "Found {0} temporal events", tt.getEvents().size());

        for (TimeEvent event : tt.getEvents()) { //for each TIMEX3 store info time index
            //stores id, file name and text (TimeML tagged) in time_index
            Document time_doc = new Document();
            time_doc.add(new StringField("id", docID, Field.Store.YES));
            //time_doc.add(new StringField("file", fileName, Field.Store.YES));
            //time_doc.add(new TextField("content", tt.getTaggedText(), Field.Store.NO));
            /*FieldType ft = new FieldType();
             ft.setStoreTermVectors(true);
             ft.setTokenized(true);
             ft.setStored(true);
             ft.setIndexed(true);
             ft.setStoreTermVectorPositions(true);
             ft.setOmitNorms(false);*/

            time_doc.add(new StringField("time", event.getDateString(), Field.Store.YES));
            time_doc.add(new IntField("offset_start", event.getStartOffset(), Field.Store.YES));
            time_doc.add(new IntField("offset_end", event.getEndOffset(), Field.Store.YES));
            time_doc.add(new TextField("context",
                    getTimeContext(tt.getText(), event.getStartOffset(), event.getEndOffset()),
                    Field.Store.NO));
            time_writer.addDocument(time_doc);
        }
    }
}

From source file:di.uniba.it.tee2.index.TemporalEventIndexingTS.java

License:Open Source License

/**
 * Crea e memorizza un documento xml a partire dalla stringa fornita in
 * input dopo averla taggata usando HeidelTime.
 *
 * @param title/*  w  ww. jav a2 s  . co  m*/
 * @param content
 * @param fileName
 * @param docID
 */
public void add(String title, String content, String fileName, String docID, int wikiID, int revisionID)
        throws Exception {
    TaggedText tt = null;
    try {
        TemporalExtractor tempExtractor = new TemporalExtractor(lang);
        tempExtractor.init();
        tt = tempExtractor.process(content);
    } catch (Exception ex) {
        logger.log(Level.WARNING, "Error to process doc " + docID + " (skip doc)", ex);
    }
    if (tt != null) {

        //stores id and text (not tagged) in docrep_index (document repository)
        Document docrep_doc = new Document();
        docrep_doc.add(new StringField("id", docID, Field.Store.YES));
        docrep_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        docrep_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        docrep_doc.add(new StringField("title", title, Field.Store.YES));
        docrep_doc.add(new StoredField("content", tt.getText()));
        docrep_doc.add(new StringField("filename", fileName, Field.Store.YES));
        docrep_writer.addDocument(docrep_doc);

        //stores id and text (not tagged) in doc_index for search
        Document doc_doc = new Document();
        doc_doc.add(new StringField("id", docID, Field.Store.YES));
        doc_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        doc_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        doc_doc.add(new TextField("title", title, Field.Store.NO));
        doc_doc.add(new TextField("content", tt.getText(), Field.Store.NO));
        doc_writer.addDocument(doc_doc);

        logger.log(Level.FINE, "Found {0} temporal events", tt.getEvents().size());

        for (TimeEvent event : tt.getEvents()) { //for each TIMEX3 store info time index
            //stores id, file name and text (TimeML tagged) in time_index
            Document time_doc = new Document();
            time_doc.add(new StringField("id", docID, Field.Store.YES));
            //time_doc.add(new StringField("file", fileName, Field.Store.YES));
            //time_doc.add(new TextField("content", tt.getTaggedText(), Field.Store.NO));
            /*FieldType ft = new FieldType();
             ft.setStoreTermVectors(true);
             ft.setTokenized(true);
             ft.setStored(true);
             ft.setIndexed(true);
             ft.setStoreTermVectorPositions(true);
             ft.setOmitNorms(false);*/

            time_doc.add(new StringField("time", event.getDateString(), Field.Store.YES));
            time_doc.add(new IntField("offset_start", event.getStartOffset(), Field.Store.YES));
            time_doc.add(new IntField("offset_end", event.getEndOffset(), Field.Store.YES));
            time_doc.add(new TextField("context",
                    getTimeContext(tt.getText(), event.getStartOffset(), event.getEndOffset()),
                    Field.Store.NO));
            time_writer.addDocument(time_doc);
        }
    }
}

From source file:dk.dma.msinm.lucene.SpatialLuceneTest.java

License:Open Source License

private Document newSampleDocument(int id, Shape... shapes) {
    Document doc = new Document();
    doc.add(new IntField("id", id, Field.Store.YES));
    for (Shape shape : shapes) {
        for (IndexableField f : strategy.createIndexableFields(shape)) {
            doc.add(f);//www . j  a  va 2  s  .c  o m
        }

        doc.add(new StoredField(strategy.getFieldName(), shape.toString()));
    }
    return doc;
}

From source file:dk.dma.msinm.service.MessageSearchService.java

License:Open Source License

/**
 * Adds a shape to the document/* w  w  w  .  jav  a2 s . c  om*/
 * @param doc the Lucene document
 * @param shape the shape to add
 * @return the updated document
 */
private Document addShapeSearchFields(Document doc, Shape shape) {
    for (IndexableField f : strategy.createIndexableFields(shape)) {
        doc.add(f);
    }
    doc.add(new StoredField(strategy.getFieldName(), shape.toString()));
    return doc;
}

From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.LuceneInMemoryConceptReranker.java

License:Apache License

private static Document toLuceneDocument(ConceptSearchResult result) {
    Document entry = new Document();
    entry.add(new StoredField("uri", result.getUri()));
    String names = String.join(", ", TypeUtil.getConceptNames(result.getConcept()));
    entry.add(new TextField("text", names, Field.Store.NO));
    return entry;
}