List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.mss.mirage.recruitment.ConsultantAction.java
License:Open Source License
private static void indexFile(IndexWriter writer, File f) throws IOException { if (f.isHidden() || !f.exists() || !f.canRead()) { return;// w w w . ja v a2s.c om } //System.out.println("Indexing " + f.getCanonicalPath()); Document doc = new Document(); doc.add(Field.Text("contents", new FileReader(f))); //doc.add(Field.Keyword("filename", f..getCanonicalPath())); doc.add(Field.Keyword("filename", f.getAbsolutePath())); writer.addDocument(doc); }
From source file:com.mycompany.lucenedemo.IndexFiles.java
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/* w w w. j av a2 s . co m*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.mycompany.mavenproject1.Main.java
private static void addItem(IndexWriter w, Item i) throws IOException { Document doc = new Document(); doc.add(new StringField("id", String.valueOf(i.getId()), Field.Store.YES)); doc.add(new StringField("price", String.valueOf(i.getPrice()), Field.Store.YES)); doc.add(new TextField("name", i.getName(), Field.Store.YES)); doc.add(new TextField("category", i.getCategory() != null ? i.getCategory() : "ni ma", Field.Store.YES)); doc.add(new TextField("description", i.getDescription(), Field.Store.YES)); w.addDocument(doc); }
From source file:com.mycompany.restlet.search.sample.indexer.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStream stream = Files.newInputStream(file); // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*ww w . j a v a 2 s.c o m*/ // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } }
From source file:com.nearinfinity.blur.search.TestingPagingCollector.java
License:Apache License
private static IndexReader getReaderFlatScore(int length) throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer())); for (int i = 0; i < length; i++) { Document document = new Document(); document.add(new Field("f1", "value", Store.NO, Index.ANALYZED_NO_NORMS)); indexWriter.addDocument(document); }//from www . ja v a 2s.c om indexWriter.close(); return IndexReader.open(directory); }
From source file:com.nearinfinity.blur.utils.TermDocIterableTest.java
License:Apache License
private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException { for (int i = 0; i < count; i++) { Document document = new Document(); document.add(new Field("id", Integer.toString(id), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); document.add(new Field("field", Integer.toString(i), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); for (int j = 0; j < 100; j++) { document.add(new Field("field" + j, "testing here testing here testing here testing here testing here testing here testing here", Store.YES, Index.NO)); }// w w w .j ava 2 s . c o m writer.addDocument(document); } }
From source file:com.nero.model.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from w w w . j a v a2 s . c o m * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.netcrest.pado.index.gemfire.lucene.TemporalLuceneDynamicIndexing.java
License:Open Source License
/** * Builds a single Lucene index for the specified temporal event. * /*from w w w. j a v a 2 s .c o m*/ * @param parser * Lucene parser * @param events * Temporal events * @param writer * Index writer */ private void buildTemporalEntry(StandardQueryParser parser, EntryEvent<ITemporalKey, ITemporalData> event, IndexWriter writer) { LuceneBuilder luceneBuilder = LuceneBuilder.getLuceneBuilder(); boolean isKeyMap = false; KeyType keyType = null; Set<Object> keySet = null; Object firstDataObject = null; Method[] attributeGetters = null; boolean isIdentityKeyPrimitive = false; // First, extract out the key type. ITemporalKey tk = event.getKey(); ITemporalData data = event.getNewValue(); if (data instanceof GemfireTemporalData) { firstDataObject = ((GemfireTemporalData) data).getValue(); } else { firstDataObject = data; } isKeyMap = firstDataObject instanceof KeyMap; if (isKeyMap == false) { if (firstDataObject instanceof Map) { keySet = ((Map) firstDataObject).keySet(); } else { attributeGetters = ReflectionHelper.getAttributeGetters(data.getClass()); } } else { keyType = ((KeyMap) firstDataObject).getKeyType(); if (keyType == null) { keySet = ((Map) firstDataObject).keySet(); } } Object identityKey = tk.getIdentityKey(); isIdentityKeyPrimitive = ReflectionHelper.isPrimitiveWrapper(identityKey.getClass()); // Next, create Lucene doc for the event LuceneField luceneField = new LuceneField(); SimpleDateFormat format = (SimpleDateFormat) DateTool.Resolution.DAY.format.clone(); Document doc = null; try { if (keyType != null) { KeyMap keyMap; if (data instanceof GemfireTemporalData) { keyMap = (KeyMap) ((GemfireTemporalData) data).getValue(); } else { keyMap = (KeyMap) data; } keyType = keyMap.getKeyType(); Set<String> nameSet = keyType.getNameSet(); // TODO: See if we can support binary types // createDoc(); doc = luceneBuilder.createKeyMapDocument(parser, writer, tk, data, -1, luceneField, keyType, keyMap, nameSet, isIdentityKeyPrimitive, true, format); } else if (keySet != null) { Map dataMap; if (data instanceof GemfireTemporalData) { dataMap = (Map) ((GemfireTemporalData) data).getValue(); } else { dataMap = (Map) data; } // TODO: See if we can support binary types // createDoc(); doc = luceneBuilder.createMapDocument(parser, writer, tk, data, luceneField, dataMap, keySet, isIdentityKeyPrimitive, format); } else { if (attributeGetters != null && attributeGetters.length > 0) { doc = luceneBuilder.createPojoDocument(parser, writer, tk, data, -1l, luceneField, attributeGetters, isIdentityKeyPrimitive, true/* isNew */, format); } } } catch (Exception ex) { Logger.error(ex); } // Append the newly created doc to Lucene if (doc != null) { try { writer.addDocument(doc); } catch (Exception ex) { Logger.error(ex); } } }
From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java
License:Open Source License
private void updateKeyMapDocument(StandardQueryParser parser, IndexWriter writer, ITemporalList tl, ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField, KeyType keyType, KeyMap keyMap, Set<String> keyTypeNameSet, boolean isIdentityKeyPrimitive, SimpleDateFormat format) throws IOException { Query query = null;//from w w w.ja va 2s. c om try { String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(), tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime()); query = parser.parse(queryString, "__doc"); } catch (Exception ex) { // Lucene 4.7 bug, internal message not serializable // Send message instead of nesting the cause. throw new RuntimeException(ex.getMessage()); } writer.deleteDocuments(query); Document doc = createKeyMapDocument(parser, writer, tk, data, endWrittenTime, luceneField, keyType, keyMap, keyTypeNameSet, isIdentityKeyPrimitive, false, format); writer.addDocument(doc); }
From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilder.java
License:Open Source License
private void updatePojoDocument(StandardQueryParser parser, IndexSearcher searcher, IndexWriter writer, ITemporalKey tk, ITemporalData data, long endWrittenTime, LuceneField luceneField, Method[] attributeGetters, boolean isIdentityKeyPrimitive, SimpleDateFormat format) throws IOException, IllegalArgumentException, IllegalAccessException, InvocationTargetException { Query query = null;//w ww. j a v a2s .c o m try { String queryString = String.format(TEMPORAL_KEY_QUERY_PREDICATE, tk.getIdentityKey(), tk.getStartValidTime(), tk.getEndValidTime(), tk.getWrittenTime()); query = parser.parse(queryString, "__doc"); } catch (Exception ex) { // Lucene 4.7 bug, internal message not serializable // Send message instead of nesting the cause. throw new RuntimeException(ex.getMessage()); } writer.deleteDocuments(query); Document doc = createPojoDocument(parser, writer, tk, data, endWrittenTime, luceneField, attributeGetters, isIdentityKeyPrimitive, false, format); writer.addDocument(doc); }