List of usage examples for org.apache.lucene.index IndexWriter getConfig
public LiveIndexWriterConfig getConfig()
From source file:net.riezebos.thoth.content.search.Indexer.java
License:Apache License
protected void addToIndex(IndexWriter writer, String resourcePath, String resourceType, String title, String contents, Map<String, String> metaTags) throws IOException { String extension = ThothUtil.getExtension(resourcePath); if (extension == null) extension = ""; extension = extension.toLowerCase(); Document document = new Document(); document.add(new StringField(INDEX_PATH, resourcePath, Field.Store.YES)); document.add(new TextField(INDEX_TYPE, resourceType, Store.YES)); document.add(new TextField(INDEX_TITLE, title, Store.YES)); document.add(new TextField(INDEX_CONTENTS, contents, Store.NO)); document.add(new TextField(INDEX_USED, "true", Store.NO)); document.add(new TextField(INDEX_EXTENSION, extension.toLowerCase(), Store.NO)); metaTags.entrySet().stream().forEach(entry -> document .add(new TextField(entry.getKey().toLowerCase(), String.valueOf(entry.getValue()), Store.NO))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): LOG.debug("Indexer for context " + contentManager.getContextName() + " added " + resourcePath); writer.addDocument(document);/*from ww w . j a v a2 s. c o m*/ } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: LOG.debug("Indexer for context " + contentManager.getContextName() + " updated " + resourcePath); writer.updateDocument(new Term(INDEX_PATH, resourcePath), document); } }
From source file:net.riezebos.thoth.content.search.util.TestIndexer.java
License:Apache License
@Override protected IndexWriter getWriter(boolean wipeIndex) throws IOException { LiveIndexWriterConfig config = mock(LiveIndexWriterConfig.class); when(config.getOpenMode()).thenReturn(wipeIndex ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND); IndexWriter indexWriter = mock(IndexWriter.class); when(indexWriter.getConfig()).thenReturn(config); recordAddDocument(indexWriter);// w ww .ja v a2 s. co m recordUpdateDocument(indexWriter); return indexWriter; }
From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java
License:Apache License
private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*w w w . ja v a 2s.co m*/ doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify // a Reader, // so that the text of the file is tokenized and indexed, but not // stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will // fail. BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); String line = null; String[] splitLine; String content = ""; boolean inContent = false; while ((line = br.readLine()) != null) { if (inContent && !line.contains(":")) { content += " " + line; } FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.setStored(true); if (!line.isEmpty() && line.contains(":")) { // System.out.println("line: " + line); splitLine = line.split(":"); if (splitLine[0].equals("content")) { inContent = true; if (splitLine.length > 1) { content += splitLine[1]; } } else if (splitLine[0].equals("end_content")) { inContent = false; // System.out.println("content - " + content.trim()); doc.add(new Field("content", content.trim(), ft)); } else { doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft)); System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim()); } } } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:oldClasses.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory.//from w w w . j a v a 2 s .c o m * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use // a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the // resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named // "contents". // Specify a Reader, // so that the text of the file is tokenized and // indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special // characters // will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may // have // been indexed) so // we use updateDocument instead to replace the old // one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.abondar.experimental.eventsearch.SearchData.java
public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*w w w . ja v a 2 s .c o m*/ ObjectMapper mapper = new ObjectMapper(); Event eb = mapper.readValue(new File(file.toString()), Event.class); doc.add(new TextField("category", eb.getCategory(), Field.Store.YES)); if (iw.getConfig().getOpenMode() == OpenMode.CREATE) { iw.addDocument(doc); for (IndexableField ifd : doc.getFields()) { System.out.println(ifd.stringValue() + " " + ifd.name()); } System.out.println("adding " + file); } else { iw.updateDocument(new Term("path", file.toString()), doc); System.out.println("updating " + file); } } }
From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories * found under the given directory./*from www .ja v a 2 s . c om*/ * * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one * document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (String fileName : files) { indexDocs(writer, new File(file, fileName)); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { String path = file.getName().toUpperCase(); Document doc = null; if (path.toLowerCase().endsWith(".pdf")) { System.out.println("Indexing PDF document: " + file); doc = LucenePDFDocument.getDocument(file); } else { System.out.println("Skipping " + file); return; } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("uid", LucenePDFDocument.createUID(file)), doc); } } finally { fis.close(); } } } }
From source file:org.apache.solr.core.TestPropInjectDefaults.java
License:Apache License
@Test public void testMergePolicyDefaults() throws Exception { ExposeWriterHandler uh = new ExposeWriterHandler(); IndexWriter writer = uh.getWriter(); LogByteSizeMergePolicy mp = (LogByteSizeMergePolicy) writer.getConfig().getMergePolicy(); assertEquals(32.0, mp.getMaxMergeMB(), 0); uh.close();/*from w w w .j a v a2s .c o m*/ }
From source file:org.apache.solr.core.TestPropInjectDefaults.java
License:Apache License
@Test public void testPropsDefaults() throws Exception { ExposeWriterHandler uh = new ExposeWriterHandler(); IndexWriter writer = uh.getWriter(); ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler(); assertEquals(4, cms.getMaxThreadCount()); uh.close();/*from w ww . jav a 2s .c o m*/ }
From source file:org.apache.solr.core.TestSimpleTextCodec.java
License:Apache License
public void test() throws Exception { SolrConfig config = h.getCore().getSolrConfig(); String codecFactory = config.get("codecFactory/@class"); assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory); assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName()); RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore()); try {/*from w w w . j a v a 2 s . com*/ IndexWriter writer = writerRef.get(); assertEquals("Unexpected codec in IndexWriter config", "SimpleText", writer.getConfig().getCodec().getName()); } finally { writerRef.decref(); } assertU(add(doc("id", "1", "text", "textual content goes here"))); assertU(commit()); RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher(); try { SolrIndexSearcher searcher = searcherRef.get(); SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory()); SegmentInfo info = infos.info(infos.size() - 1).info; assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName()); } finally { searcherRef.decref(); } assertQ(req("q", "id:1"), "*[count(//doc)=1]"); }
From source file:org.apache.solr.handler.admin.SegmentsInfoRequestHandler.java
License:Apache License
private List<String> getMergeCandidatesNames(SolrQueryRequest req, SegmentInfos infos) throws IOException { List<String> result = new ArrayList<String>(); IndexWriter indexWriter = getIndexWriter(req); //get chosen merge policy MergePolicy mp = indexWriter.getConfig().getMergePolicy(); //Find merges MergeSpecification findMerges = mp.findMerges(MergeTrigger.EXPLICIT, infos, indexWriter); if (findMerges != null && findMerges.merges != null && findMerges.merges.size() > 0) { for (OneMerge merge : findMerges.merges) { //TODO: add merge grouping for (SegmentCommitInfo mergeSegmentInfo : merge.segments) { result.add(mergeSegmentInfo.info.name); }//from www .j a va 2s . c om } } return result; }