List of usage examples for org.apache.lucene.document Field setIntValue
public void setIntValue(int value)
From source file:com.bewsia.script.LuceneHandler.java
License:Open Source License
protected void write(SEntity entity, Document doc) { String schema = entity.getSchema(); if (schema == null) schema = ""; String[] fields = schema.split("\\|"); for (int i = 0; i < fields.length && i + 1 < fields.length; i += 2) { String kind = fields[i];//from w w w . jav a 2s .com String fname = fields[i + 1]; if (SEntity.STRING.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.NOT_ANALYZED_NO_NORMS); doc.add(field); } else if (SEntity.DOUBLE.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setDoubleValue(entity.getDouble(fname)); doc.add(field); } else if (SEntity.FLOAT.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setFloatValue(entity.getFloat(fname)); doc.add(field); } else if (SEntity.INTEGER.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setIntValue(entity.getInteger(fname)); doc.add(field); } else if (SEntity.LONG.equalsIgnoreCase(kind)) { NumericField field = new NumericField(fname, Store.YES, true); field.setLongValue(entity.getLong(fname)); doc.add(field); } else if (SEntity.ANALYZED.equalsIgnoreCase(kind)) { Field field = new Field(fname, entity.getString(fname), Store.YES, Index.ANALYZED); doc.add(field); } } }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }//ww w . ja v a 2s. c o m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:de.tudarmstadt.lt.lm.app.GenerateNgramIndex.java
License:Apache License
public void create_ngram_index(File ngram_joined_counts_file) throws IOException { File index_dir = new File(_index_dir, "ngram"); if (index_dir.exists()) { LOG.info("Ngram index already exists in directory '{}'.", index_dir.getAbsolutePath()); if (_overwrite) { LOG.info("Overwriting index '{}',", index_dir); index_dir.delete();/*from w w w .j a va 2 s .c o m*/ } else return; } index_dir.mkdirs(); Analyzer analyzer = new KeywordAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); // use 80 percent of the available total memory double total_mem_mb = (double) Runtime.getRuntime().maxMemory() / 1e6; double percentage_ram_buffer = Properties.ramBufferPercentage(); if (percentage_ram_buffer > 0) { double percentage_ram_buffer_mb = total_mem_mb * percentage_ram_buffer; LOG.info(String.format("Setting ram buffer size to %.2f MB (%.2f%% from %.2f MB)", percentage_ram_buffer_mb, percentage_ram_buffer * 100, total_mem_mb)); iwc.setRAMBufferSizeMB(percentage_ram_buffer_mb); } Directory directory = new MMapDirectory(index_dir); IndexWriter writer_ngram = new IndexWriter(directory, iwc); InputStream in = new FileInputStream(ngram_joined_counts_file); if (ngram_joined_counts_file.getName().endsWith(".gz")) in = new GZIPInputStream(in); LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in, "UTF-8"))); Document doc = new Document(); Field f_ngram = new StringField("ngram", "", Store.YES); doc.add(f_ngram); Field f_n = new IntField("cardinality", 0, Store.YES); doc.add(f_n); Field f_word = new StringField("word", "", Store.YES); doc.add(f_word); Field f_hist = new StringField("history", "", Store.YES); doc.add(f_hist); Field f_lower = new StringField("lower", "", Store.YES); doc.add(f_lower); Field f_count = new StoredField("num", 0L); doc.add(f_count); Field[] f_follow = new Field[4]; f_follow[0] = new StoredField("nf_s", 0L); doc.add(f_follow[0]); f_follow[1] = new StoredField("nf_N1", 0L); doc.add(f_follow[1]); f_follow[2] = new StoredField("nf_N2", 0L); doc.add(f_follow[2]); f_follow[3] = new StoredField("nf_N3", 0L); doc.add(f_follow[3]); Field[] f_precede = new Field[4]; f_precede[0] = new StoredField("np_s", 0L); doc.add(f_precede[0]); f_precede[1] = new StoredField("np_N1", 0L); doc.add(f_precede[1]); f_precede[2] = new StoredField("np_N2", 0L); doc.add(f_precede[2]); f_precede[3] = new StoredField("np_N3", 0L); doc.add(f_precede[3]); Field[] f_followerprecede = new Field[4]; f_followerprecede[0] = new StoredField("nfp_s", 0L); doc.add(f_followerprecede[0]); f_followerprecede[1] = new StoredField("nfp_N1", 0L); doc.add(f_followerprecede[1]); f_followerprecede[2] = new StoredField("nfp_N2", 0L); doc.add(f_followerprecede[2]); f_followerprecede[3] = new StoredField("nfp_N3", 0L); doc.add(f_followerprecede[3]); Long[][] N = new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } }; Long[] S = new Long[] { 0L }; long c = 0; while (iter.hasNext()) { if (++c % 100000 == 0) LOG.info("Adding {}'th ngram.", c); String line = iter.next(); try { String[] splits = de.tudarmstadt.lt.utilities.StringUtils.rtrim(line).split("\t"); String ngram_str = splits[0]; if (de.tudarmstadt.lt.utilities.StringUtils.trim(ngram_str).isEmpty()) { LOG.warn("Ngram is empty, skipping line {}: '{}' (file '{}').", c, line, ngram_joined_counts_file); continue; } List<String> ngram = Arrays.asList(ngram_str.split(" ")); long num = Long.parseLong(splits[1]); int n = ngram.size(); f_ngram.setStringValue(ngram_str); f_n.setIntValue(n); f_word.setStringValue(ngram.get(ngram.size() - 1)); f_hist.setStringValue(StringUtils.join(ngram.subList(0, ngram.size() - 1), " ")); f_lower.setStringValue(StringUtils.join(ngram.subList(1, ngram.size()), " ")); f_count.setLongValue(num); for (int j = 0; j < f_follow.length; j++) { f_follow[j].setLongValue(0L); f_precede[j].setLongValue(0L); f_followerprecede[j].setLongValue(0L); } if (splits.length > 2 && !splits[2].isEmpty()) { // precede or follow or followerprecede String[] splits_ = splits[2].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } if (splits.length > 3 && !splits[3].isEmpty()) { // should be follow or followerprecede String[] splits_ = splits[3].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } if (splits.length > 4 && !splits[4].isEmpty()) { // should be followerprecede String[] splits_ = splits[4].split(":"); String type = splits_[0]; String[] count_values = splits_[1].split(","); if (count_values.length > 0) { if ("n_f".equals(type)) f_follow[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_p".equals(type)) f_precede[0].setLongValue(Long.parseLong(count_values[0])); else if ("n_fp".equals(type)) f_followerprecede[0].setLongValue(Long.parseLong(count_values[0])); } for (int i = 1; i < count_values.length; i++) { if ("n_f".equals(type)) f_follow[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_p".equals(type)) f_precede[i].setLongValue(Long.parseLong(count_values[i])); else if ("n_fp".equals(type)) f_followerprecede[i].setLongValue(Long.parseLong(count_values[i])); } } writer_ngram.addDocument(doc); while (N.length <= n) { N = ArrayUtils.getConcatinatedArray(N, new Long[][] { { 0L, 0L, 0L, 0L, 0L, 0L } }); S = ArrayUtils.getConcatinatedArray(S, new Long[] { 0L }); } if (num == 1L) N[n][1]++; else if (num == 2L) N[n][2]++; else if (num == 3L) N[n][3]++; else if (num == 4L) N[n][4]++; else N[n][5]++; N[n][0]++; S[n] += num; } catch (Exception e) { LOG.error("Could not process line '{}' in file '{}:{}', malformed line.", line, ngram_joined_counts_file, c, e); } } writer_ngram.forceMergeDeletes(); writer_ngram.commit(); writer_ngram.close(); StringBuilder b = new StringBuilder(String.format( "#%n# Number of times where an ngram occurred: %n# at_least_once, exactly_once, exactly_twice, exactly_three_times, exactly_four_times, five_times_or_more.%n#%nmax_n=%d%nmax_c=6%n", N.length - 1)); for (int n = 1; n < N.length; n++) b.append(String.format("n%d=%s%n", n, StringUtils.join(N[n], ','))); for (int n = 1; n < S.length; n++) b.append(String.format("s%d=%d%n", n, S[n])); FileUtils.writeStringToFile(new File(_index_dir, "__sum_ngrams__"), b.toString()); }
From source file:org.apache.solr.legacy.TestLegacyField.java
License:Apache License
public void testLegacyIntField() throws Exception { Field fields[] = new Field[] { new LegacyIntField("foo", 5, Field.Store.NO), new LegacyIntField("foo", 5, Field.Store.YES) }; for (Field field : fields) { trySetByteValue(field);// ww w .j a v a 2 s. c o m trySetBytesValue(field); trySetBytesRefValue(field); trySetDoubleValue(field); field.setIntValue(6); // ok trySetFloatValue(field); trySetLongValue(field); trySetReaderValue(field); trySetShortValue(field); trySetStringValue(field); trySetTokenStreamValue(field); assertEquals(6, field.numericValue().intValue()); } }
From source file:org.apache.solr.legacy.TestLegacyField.java
License:Apache License
private void trySetIntValue(Field f) { expectThrows(IllegalArgumentException.class, () -> { f.setIntValue(Integer.MAX_VALUE); });//from w w w . j ava 2s . co m }
From source file:org.lukhnos.lucenestudy.Indexer.java
License:MIT License
public void addDocuments(List<Document> docs) throws IOException { // Reuse doc and field instances. See http://wiki.apache.org/lucene-java/ImproveIndexingSpeed Field titleField = new TextField(TITLE_FIELD_NAME, "", Field.Store.YES); Field titleDocsValueField = new SortedDocValuesField(TITLE_FIELD_NAME, new BytesRef(0)); Field yearField = new IntField(YEAR_FIELD_NAME, 0, Field.Store.YES); Field yearDocsValueField = new NumericDocValuesField(YEAR_FIELD_NAME, 0L); Field ratingField = new IntField(RATING_FIELD_NAME, 0, Field.Store.YES); Field ratingDocsValueField = new NumericDocValuesField(RATING_FIELD_NAME, 0L); Field positiveField = new IntField(POSITIVE_FIELD_NAME, 0, Field.Store.YES); Field reviewField = new TextField(REVIEW_FIELD_NAME, "", Field.Store.YES); Field sourceField = new StringField(SOURCE_FIELD_NAME, "", Field.Store.YES); for (Document doc : docs) { org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document(); if (doc.title != null && !doc.title.isEmpty()) { titleField.setStringValue(doc.title); luceneDoc.add(titleField);//from w ww. j ava2s. com titleDocsValueField.setBytesValue(new BytesRef(doc.title)); luceneDoc.add(titleDocsValueField); } yearField.setIntValue(doc.year); luceneDoc.add(yearField); yearDocsValueField.setLongValue((long) doc.year); luceneDoc.add(yearDocsValueField); ratingField.setIntValue(doc.rating); luceneDoc.add(ratingField); ratingDocsValueField.setLongValue((long) doc.rating); luceneDoc.add(ratingDocsValueField); positiveField.setIntValue(doc.positive ? 1 : 0); luceneDoc.add(positiveField); if (doc.review != null && !doc.review.isEmpty()) { reviewField.setStringValue(doc.review); luceneDoc.add(reviewField); } if (doc.source != null && !doc.source.isEmpty()) { sourceField.setStringValue(doc.source); luceneDoc.add(sourceField); } indexWriter.addDocument(luceneDoc); } indexWriter.commit(); }
From source file:suonos.lucene.fields.IndexedField.java
License:Apache License
public void setValue(Field field, Object value) { if (javaType == String.class) { field.setStringValue((String) value); } else if (javaType == Double.class) { field.setDoubleValue((Double) value); } else if (javaType == Float.class) { field.setFloatValue((Float) value); } else if (javaType == Integer.class) { field.setIntValue((Integer) value); } else if (javaType == Long.class) { field.setLongValue((Long) value); } else if (javaType == Boolean.class) { field.setStringValue(getBoolValue(value)); } else if (javaType == Date.class) { field.setLongValue(((Date) value).getTime()); } else if (javaType == Byte[].class) { field.setBytesValue((byte[]) value); }/*w w w.ja v a 2 s.co m*/ throw Validate.notAllowed(""); }
From source file:suonos.lucene.Statement.java
License:Apache License
public void saveObject(StoreObject object) throws IOException { openSharedWriter();//from w w w . j ava 2s. c o m Document doc = new Document(); Field fld; if (object.getId() == null) { object.setId(Uids.newUID()); log.debug("New UID {}", object.getId()); } // Update date. // fld = context.update$_field(); fld.setIntValue(MTime.fromCurrentTime()); doc.add(fld); // Document type. // fld = context.type$_field(); fld.setStringValue(object.getClass().getSimpleName()); doc.add(fld); // Document object serialized in json format and compressed using // snappy. // fld = context.obj$_field(); fld.setBytesValue(Snappy.compress(serializeToJson(object).getBytes())); doc.add(fld); ModelType modelType = context.getModelType(object.getClass()); modelType.saveToLuceneDoc(context, object, doc); Term term = new Term("id", object.getId()); indexWriter().updateDocument(term, doc); // commit(); // object = queryObject(object.getClass(), object.getUid()).getObject(); }