Example usage for org.apache.lucene.document StoredField StoredField

Introduction

In this page you can find the example usage for org.apache.lucene.document StoredField StoredField.

Prototype

public StoredField(String name, double value)

Source Link

Document

Create a stored-only field with the given double value.

Usage

From source file:de.ids_mannheim.korap.index.FieldDocument.java

public void addStored(String key, String value) {
    doc.add(new StoredField(key, value));
}

From source file:de.ids_mannheim.korap.index.FieldDocument.java

public void addStored(String key, int value) {
    doc.add(new StoredField(key, value));
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Tries to cast field value (object) to a string or byte[].
 * If the field value is not a string or a byte[] then the method ignores the field.
 * <p>// w w w.  j  a va  2  s  .c  o m
 * If the value is a string or byte[] than the value is warped into a matching lucene field (Field for String,
 * StoredField for byte[]) and added to the lucene document.
 *
 * @param document   the lucene document to add the number
 * @param fieldName  the field name
 * @param fieldValue the field value
 */
private static void handleStringsAndBytes(Document document, String fieldName, Object fieldValue) {
    if (fieldValue instanceof String) {
        document.add(new Field(fieldName, fieldValue.toString(), TextField.TYPE_STORED));
    } else if (fieldValue instanceof byte[]) {
        document.add(new StoredField(fieldName, new BytesRef((byte[]) fieldValue)));
    }
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Tries to cast field value (object) to a number (double, integer, float, long).
 * If the field value is not a number then method ignores the field.
 * <p>/*from  w  w  w.j  a  v  a2 s  .  co m*/
 * If the value is a number than the value is warped into a matching lucene field (IntField, DoubleField, ...)
 * and added to the lucene document.
 *
 * @param document   the lucene document to add the number
 * @param fieldName  the field name
 * @param fieldValue the field value
 */
private static void handleNumbers(Document document, String fieldName, Object fieldValue) {
    if (fieldValue instanceof Double) {
        document.add(new StoredField(fieldName, Double.parseDouble(fieldValue.toString())));
    } else if (fieldValue instanceof Integer) {
        document.add(new StoredField(fieldName, Integer.parseInt(fieldValue.toString())));
    } else if (fieldValue instanceof Float) {
        document.add(new StoredField(fieldName, Float.parseFloat(fieldValue.toString())));
    } else if (fieldValue instanceof Long) {
        document.add(new StoredField(fieldName, Long.parseLong(fieldValue.toString())));
    } else {
        LOGGER.warn("Cloud not extract value from field {} with value {}", fieldName, fieldValue);
    }

}

From source file:de.tudarmstadt.lt.lm.app.GenerateNgramIndex.java

License:Apache License

public void create_ngram_index(File ngram_joined_counts_file) throws IOException {
    File index_dir = new File(_index_dir, "ngram");
    if (index_dir.exists()) {
        LOG.info("Ngram index already exists in directory '{}'.", index_dir.getAbsolutePath());
        if (_overwrite) {
            LOG.info("Overwriting index '{}',", index_dir);
            index_dir.delete();/*w w  w.j a va2s. c  o m*/
        } else
            return;
    }
    index_dir.mkdirs();

    Analyzer analyzer = new KeywordAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    // use 80 percent of the available total memory
    double total_mem_mb = (double) Runtime.getRuntime().maxMemory() / 1e6;
    double percentage_ram_buffer = Properties.ramBufferPercentage();
    if (percentage_ram_buffer > 0) {
        double percentage_ram_buffer_mb = total_mem_mb * percentage_ram_buffer;
        LOG.info(String.format("Setting ram buffer size to %.2f MB (%.2f%% from %.2f MB)",
                percentage_ram_buffer_mb, percentage_ram_buffer * 100, total_mem_mb));
        iwc.setRAMBufferSizeMB(percentage_ram_buffer_mb);
    }

    Directory directory = new MMapDirectory(index_dir);
    IndexWriter writer_ngram = new IndexWriter(directory, iwc);

    InputStream in = new FileInputStream(ngram_joined_counts_file);
    if (ngram_joined_counts_file.getName().endsWith(".gz"))
        in = new GZIPInputStream(in);
    LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in, "UTF-8")));

    Document doc = new Document();
    Field f_ngram = new StringField("ngram", "", Store.YES);
    doc.add(f_ngram);
    Field f_n = new IntField("cardinality", 0, Store.YES);
    doc.add(f_n);
    Field f_word = new StringField("word", "", Store.YES);
    doc.add(f_word);
    Field f_hist = new StringField("history", "", Store.YES);
    doc.add(f_hist);
    Field f_lower = new StringField("lower", "", Store.YES);
    doc.add(f_lower);
    Field f_count = new StoredField("num", 0L);
    doc.add(f_count);

    Field[] f_follow = new Field[4];
    f_follow[0] = new StoredField("nf_s", 0L);
    doc.add(f_follow[0]);
    f_follow[1] = new StoredField("nf_N1", 0L);
    doc.add(f_follow[1]);
    f_follow[2] = new StoredField("nf_N2", 0L);
    doc.add(f_follow[2]);
    f_follow[3] = new StoredField("nf_N3", 0L);
    doc.add(f_follow[3]);
    Field[] f_precede = new Field[4];
    f_precede[0] = new StoredField("np_s", 0L);
    doc.add(f_precede[0]);
    f_precede[1] = new StoredField("np_N1", 0L);
    doc.add(f_precede[1]);
    f_precede[2] = new StoredField("np_N2", 0L);
    doc.add(f_precede[2]);
    f_precede[3] = new StoredField("np_N3", 0L);
    doc.add(f_precede[3]);
    Field[] f_followerprecede = new Field[4];
    f_followerprecede[0] = new StoredField("nfp_s", 0L);
    doc.add(f_followerprecede[0]);
    f_followerprecede[1] = new StoredField("nfp_N1", 0L);
    doc.add(f_followerprecede[1]);
    f_followerprecede[2] = new StoredField("nfp_N2", 0L);
    doc.add(f_followerprecede[2]);
    f_followerprecede[3] = new StoredField("nfp_N3", 0L);
    doc.add(f_followerprecede[3]);

    Long[][] N = new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } };
    Long[] S = new Long[] { 0L };
    long c = 0;
    while (iter.hasNext()) {
        if (++c % 100000 == 0)
            LOG.info("Adding {}'th ngram.", c);
        String line = iter.next();
        try {
            String[] splits = de.tudarmstadt.lt.utilities.StringUtils.rtrim(line).split("\t");
            String ngram_str = splits[0];
            if (de.tudarmstadt.lt.utilities.StringUtils.trim(ngram_str).isEmpty()) {
                LOG.warn("Ngram is empty, skipping line {}: '{}' (file '{}').", c, line,
                        ngram_joined_counts_file);
                continue;
            }

            List<String> ngram = Arrays.asList(ngram_str.split(" "));
            long num = Long.parseLong(splits[1]);
            int n = ngram.size();

            f_ngram.setStringValue(ngram_str);
            f_n.setIntValue(n);
            f_word.setStringValue(ngram.get(ngram.size() - 1));
            f_hist.setStringValue(StringUtils.join(ngram.subList(0, ngram.size() - 1), " "));
            f_lower.setStringValue(StringUtils.join(ngram.subList(1, ngram.size()), " "));
            f_count.setLongValue(num);

            for (int j = 0; j < f_follow.length; j++) {
                f_follow[j].setLongValue(0L);
                f_precede[j].setLongValue(0L);
                f_followerprecede[j].setLongValue(0L);
            }

            if (splits.length > 2 && !splits[2].isEmpty()) {
                // precede or follow or followerprecede
                String[] splits_ = splits[2].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }
            if (splits.length > 3 && !splits[3].isEmpty()) {
                // should be follow or followerprecede
                String[] splits_ = splits[3].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }
            if (splits.length > 4 && !splits[4].isEmpty()) {
                // should be followerprecede
                String[] splits_ = splits[4].split(":");
                String type = splits_[0];
                String[] count_values = splits_[1].split(",");
                if (count_values.length > 0) {
                    if ("n_f".equals(type))
                        f_follow[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_p".equals(type))
                        f_precede[0].setLongValue(Long.parseLong(count_values[0]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[0].setLongValue(Long.parseLong(count_values[0]));
                }
                for (int i = 1; i < count_values.length; i++) {
                    if ("n_f".equals(type))
                        f_follow[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_p".equals(type))
                        f_precede[i].setLongValue(Long.parseLong(count_values[i]));
                    else if ("n_fp".equals(type))
                        f_followerprecede[i].setLongValue(Long.parseLong(count_values[i]));
                }
            }

            writer_ngram.addDocument(doc);

            while (N.length <= n) {
                N = ArrayUtils.getConcatinatedArray(N, new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } });
                S = ArrayUtils.getConcatinatedArray(S, new Long[] { 0L });
            }

            if (num == 1L)
                N[n][1]++;
            else if (num == 2L)
                N[n][2]++;
            else if (num == 3L)
                N[n][3]++;
            else if (num == 4L)
                N[n][4]++;
            else
                N[n][5]++;
            N[n][0]++;
            S[n] += num;

        } catch (Exception e) {
            LOG.error("Could not process line '{}' in file '{}:{}', malformed line.", line,
                    ngram_joined_counts_file, c, e);
        }
    }

    writer_ngram.forceMergeDeletes();
    writer_ngram.commit();
    writer_ngram.close();

    StringBuilder b = new StringBuilder(String.format(
            "#%n# Number of times where an ngram occurred: %n#  at_least_once, exactly_once, exactly_twice, exactly_three_times, exactly_four_times, five_times_or_more.%n#%nmax_n=%d%nmax_c=6%n",
            N.length - 1));
    for (int n = 1; n < N.length; n++)
        b.append(String.format("n%d=%s%n", n, StringUtils.join(N[n], ',')));
    for (int n = 1; n < S.length; n++)
        b.append(String.format("s%d=%d%n", n, S[n]));
    FileUtils.writeStringToFile(new File(_index_dir, "__sum_ngrams__"), b.toString());

}

From source file:di.uniba.it.tee2.index.TemporalEventIndexing.java

License:Open Source License

/**
 * Crea e memorizza un documento xml a partire dalla stringa fornita in
 * input dopo averla taggata usando HeidelTime.
 *
 * @param title/*ww  w  . j  a  v a2  s.  c o m*/
 * @param content
 * @param fileName
 * @param docID
 * @param wikiID
 * @param revisionID
 * @throws java.lang.Exception
 */
public void add(String title, String content, String fileName, String docID, int wikiID, int revisionID)
        throws Exception {
    TaggedText tt = null;
    try {
        tt = tempExtractor.process(content);
    } catch (Exception ex) {
        logger.log(Level.WARNING, "Error to process doc " + docID + " (skip doc)", ex);
    }
    if (tt != null) {

        //stores id and text (not tagged) in docrep_index (document repository)
        Document docrep_doc = new Document();
        docrep_doc.add(new StringField("id", docID, Field.Store.YES));
        docrep_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        docrep_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        docrep_doc.add(new StringField("title", title, Field.Store.YES));
        docrep_doc.add(new StoredField("content", tt.getText()));
        docrep_doc.add(new StringField("filename", fileName, Field.Store.YES));
        docrep_writer.addDocument(docrep_doc);

        //stores id and text (not tagged) in doc_index for search
        Document doc_doc = new Document();
        doc_doc.add(new StringField("id", docID, Field.Store.YES));
        doc_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        doc_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        doc_doc.add(new TextField("title", title, Field.Store.NO));
        doc_doc.add(new TextField("content", tt.getText(), Field.Store.NO));
        doc_writer.addDocument(doc_doc);

        logger.log(Level.FINE, "Found {0} temporal events", tt.getEvents().size());

        for (TimeEvent event : tt.getEvents()) { //for each TIMEX3 store info time index
            //stores id, file name and text (TimeML tagged) in time_index
            Document time_doc = new Document();
            time_doc.add(new StringField("id", docID, Field.Store.YES));
            //time_doc.add(new StringField("file", fileName, Field.Store.YES));
            //time_doc.add(new TextField("content", tt.getTaggedText(), Field.Store.NO));
            /*FieldType ft = new FieldType();
             ft.setStoreTermVectors(true);
             ft.setTokenized(true);
             ft.setStored(true);
             ft.setIndexed(true);
             ft.setStoreTermVectorPositions(true);
             ft.setOmitNorms(false);*/

            time_doc.add(new StringField("time", event.getDateString(), Field.Store.YES));
            time_doc.add(new IntField("offset_start", event.getStartOffset(), Field.Store.YES));
            time_doc.add(new IntField("offset_end", event.getEndOffset(), Field.Store.YES));
            time_doc.add(new TextField("context",
                    getTimeContext(tt.getText(), event.getStartOffset(), event.getEndOffset()),
                    Field.Store.NO));
            time_writer.addDocument(time_doc);
        }
    }
}

From source file:di.uniba.it.tee2.index.TemporalEventIndexingTS.java

License:Open Source License

/**
 * Crea e memorizza un documento xml a partire dalla stringa fornita in
 * input dopo averla taggata usando HeidelTime.
 *
 * @param title/*  w  ww. jav a2 s  . co  m*/
 * @param content
 * @param fileName
 * @param docID
 */
public void add(String title, String content, String fileName, String docID, int wikiID, int revisionID)
        throws Exception {
    TaggedText tt = null;
    try {
        TemporalExtractor tempExtractor = new TemporalExtractor(lang);
        tempExtractor.init();
        tt = tempExtractor.process(content);
    } catch (Exception ex) {
        logger.log(Level.WARNING, "Error to process doc " + docID + " (skip doc)", ex);
    }
    if (tt != null) {

        //stores id and text (not tagged) in docrep_index (document repository)
        Document docrep_doc = new Document();
        docrep_doc.add(new StringField("id", docID, Field.Store.YES));
        docrep_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        docrep_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        docrep_doc.add(new StringField("title", title, Field.Store.YES));
        docrep_doc.add(new StoredField("content", tt.getText()));
        docrep_doc.add(new StringField("filename", fileName, Field.Store.YES));
        docrep_writer.addDocument(docrep_doc);

        //stores id and text (not tagged) in doc_index for search
        Document doc_doc = new Document();
        doc_doc.add(new StringField("id", docID, Field.Store.YES));
        doc_doc.add(new IntField("wikiID", wikiID, Field.Store.YES));
        doc_doc.add(new IntField("revisionID", revisionID, Field.Store.YES));
        doc_doc.add(new TextField("title", title, Field.Store.NO));
        doc_doc.add(new TextField("content", tt.getText(), Field.Store.NO));
        doc_writer.addDocument(doc_doc);

        logger.log(Level.FINE, "Found {0} temporal events", tt.getEvents().size());

        for (TimeEvent event : tt.getEvents()) { //for each TIMEX3 store info time index
            //stores id, file name and text (TimeML tagged) in time_index
            Document time_doc = new Document();
            time_doc.add(new StringField("id", docID, Field.Store.YES));
            //time_doc.add(new StringField("file", fileName, Field.Store.YES));
            //time_doc.add(new TextField("content", tt.getTaggedText(), Field.Store.NO));
            /*FieldType ft = new FieldType();
             ft.setStoreTermVectors(true);
             ft.setTokenized(true);
             ft.setStored(true);
             ft.setIndexed(true);
             ft.setStoreTermVectorPositions(true);
             ft.setOmitNorms(false);*/

            time_doc.add(new StringField("time", event.getDateString(), Field.Store.YES));
            time_doc.add(new IntField("offset_start", event.getStartOffset(), Field.Store.YES));
            time_doc.add(new IntField("offset_end", event.getEndOffset(), Field.Store.YES));
            time_doc.add(new TextField("context",
                    getTimeContext(tt.getText(), event.getStartOffset(), event.getEndOffset()),
                    Field.Store.NO));
            time_writer.addDocument(time_doc);
        }
    }
}

From source file:dk.dma.msinm.lucene.SpatialLuceneTest.java

License:Open Source License

private Document newSampleDocument(int id, Shape... shapes) {
    Document doc = new Document();
    doc.add(new IntField("id", id, Field.Store.YES));
    for (Shape shape : shapes) {
        for (IndexableField f : strategy.createIndexableFields(shape)) {
            doc.add(f);//www . j  a  va 2  s  .c  o m
        }

        doc.add(new StoredField(strategy.getFieldName(), shape.toString()));
    }
    return doc;
}

From source file:dk.dma.msinm.service.MessageSearchService.java

License:Open Source License

/**
 * Adds a shape to the document/* w  w  w  .  jav  a2 s . c  om*/
 * @param doc the Lucene document
 * @param shape the shape to add
 * @return the updated document
 */
private Document addShapeSearchFields(Document doc, Shape shape) {
    for (IndexableField f : strategy.createIndexableFields(shape)) {
        doc.add(f);
    }
    doc.add(new StoredField(strategy.getFieldName(), shape.toString()));
    return doc;
}

From source file:edu.cmu.lti.oaqa.baseqa.concept.rerank.LuceneInMemoryConceptReranker.java

License:Apache License

private static Document toLuceneDocument(ConceptSearchResult result) {
    Document entry = new Document();
    entry.add(new StoredField("uri", result.getUri()));
    String names = String.join(", ", TypeUtil.getConceptNames(result.getConcept()));
    entry.add(new TextField("text", names, Field.Store.NO));
    return entry;
}