List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.browseengine.bobo.test.TestPathMultiVal.java
License:Apache License
@Override protected void setUp() throws Exception { directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED); Document doc = new Document(); addMetaDataField(doc, PathHandlerName, new String[] { "/a/b/c", "/a/b/d" }); writer.addDocument(doc); writer.commit();/* ww w . ja v a 2 s. c o m*/ PathFacetHandler pathHandler = new PathFacetHandler("path", true); facetHandlers.add(pathHandler); }
From source file:com.bsiag.smartfield.server.services.custom.lucene.indexwriter.IndexWriterService.java
License:Open Source License
public void createZipIndex(RemoteFile file) { try {// w ww . j a v a 2 s. c o m Set<String> zipCache = new HashSet<String>(); String path = BundleContextUtility .resolve("${workspace_loc}/com.bsiag.smartfield.server/resources/index"); IOUtility.deleteDirectory(path); Directory index = new SimpleFSDirectory(new File(path)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)); IndexWriter w; w = new IndexWriter(index, config); file.setCharsetName("UTF-8"); BufferedReader reader = new BufferedReader(file.getDecompressedReader()); String line; while ((line = reader.readLine()) != null) { String[] tokens = line.split("\t"); if (tokens.length > 1) { String zip = tokens[0].trim(); String city = tokens[1].trim(); if (StringUtility.hasText(city) && StringUtility.hasText(zip)) { w.addDocument(createDocoment(zip, city)); zipCache.add(zip + city); } } } w.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.bugull.mongo.lucene.backend.IndexInsertJob.java
License:Apache License
@Override public void doJob() { Class<?> clazz = obj.getClass(); String name = MapperUtil.getEntityName(clazz); IndexWriterHolder holder = IndexWriterHolder.getInstance(); IndexWriter writer = holder.get(name); Document doc = new Document(); IndexCreator creator = new IndexCreator(obj, ""); creator.create(doc);/*from w ww . jav a 2 s . c om*/ try { writer.addDocument(doc); } catch (CorruptIndexException ex) { logger.error("IndexWriter can not add a document to the lucene index", ex); } catch (IOException ex) { logger.error("IndexWriter can not add a document to the lucene index", ex); } }
From source file:com.bugull.mongo.lucene.backend.IndexInsertTask.java
License:Apache License
@Override public void run() { Class<?> clazz = obj.getClass(); String name = MapperUtil.getEntityName(clazz); IndexWriterCache cache = IndexWriterCache.getInstance(); IndexWriter writer = cache.get(name); Document doc = new Document(); IndexCreator creator = new IndexCreator(obj, ""); creator.create(doc);/*from www . jav a 2 s .c om*/ try { writer.addDocument(doc); } catch (CorruptIndexException ex) { logger.error("IndexWriter can not add a document to the lucene index", ex); } catch (IOException ex) { logger.error("IndexWriter can not add a document to the lucene index", ex); } }
From source file:com.burkeware.search.api.internal.lucene.DefaultIndexer.java
License:Open Source License
/** * Write json representation of a single object as a single document entry inside Lucene index. * * @param jsonObject the json object to be written to the index * @param resource the configuration to transform json to lucene document * @param writer the lucene index writer * @throws java.io.IOException when writing document failed *///from ww w . j av a2 s.c o m private void writeObject(final Object jsonObject, final Resource resource, final IndexWriter writer) throws IOException { Document document = new Document(); document.add(new Field(DEFAULT_FIELD_JSON, jsonObject.toString(), Field.Store.YES, Field.Index.NO)); document.add(new Field(DEFAULT_FIELD_UUID, UUID.randomUUID().toString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); document.add(new Field(DEFAULT_FIELD_CLASS, resource.getResourceObject().getName(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); document.add(new Field(DEFAULT_FIELD_RESOURCE, resource.getName(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); for (SearchableField searchableField : resource.getSearchableFields()) { Object value = JsonPath.read(jsonObject, searchableField.getExpression()); document.add(new Field(searchableField.getName(), String.valueOf(value), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); } if (getLogger().isDebugEnabled()) getLogger().debug(this.getClass().getSimpleName(), "Writing document: " + document); writer.addDocument(document); }
From source file:com.cep.darkstar.onramp.djnews.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from w ww .jav a 2s.c om * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setOmitTermFreqAndPositions(true); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.chenyi.langeasy.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { count++;/*from ww w. j a v a 2 s . c o m*/ if (count % 500 == 499) { System.out.println(count + "/" + new Date()); } // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, // which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you // require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify // a Reader, // so that the text of the file is tokenized and indexed, but not // stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will // fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }// w ww . j a v a 2 s.c om String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.codecrate.shard.search.ObjectIndexer.java
License:Apache License
public void save(Serializable id, Object entity) { removeDocuments(id);/* w ww. jav a 2 s.co m*/ IndexWriter writer = null; try { writer = new IndexWriter(directory, analyzer, DO_NOT_CREATE_INDEX); Document document = new Document(); document.add(Field.Keyword(HibernateObjectSearcher.FIELD_CLASS, entity.getClass().getName())); document.add(Field.Keyword(HibernateObjectSearcher.FIELD_ID, id.toString())); document.add(Field.Text(HibernateObjectSearcher.FIELD_TEXT, entity.toString())); LOG.debug("saving " + document); writer.addDocument(document); } catch (IOException e) { LOG.error("Error updating index for object " + entity, e); } finally { closeWriter(writer); } }
From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java
License:Open Source License
private static void indexDocs(IndexWriter writer, Path file) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); System.out.println("file path " + file.toAbsolutePath().toString()); Field pathField = new StringField(PATH, file.toAbsolutePath().toString(), Field.Store.YES); doc.add(pathField);/* w w w . jav a 2 s. c om*/ doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term(PATH, file.toString()), doc); } } }