List of usage examples for org.apache.lucene.index IndexWriterConfig setCodec
public IndexWriterConfig setCodec(Codec codec)
From source file:gov.nist.basekb.FreebaseIndexer.java
License:LGPL
public void initializeIndexBuilder() throws Exception { // Create a new index directory and writer to index a triples file. // Raise an error if an index already exists, so we don't accidentally overwrite it. String indexDir = getIndexDirectoryName(); if ((new File(indexDir)).isDirectory()) throw new IOException("Index directory already exists, remove it before indexing"); indexDirectory = FSDirectory.open(Paths.get(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer()); // we always create a new index from scratch: iwc.setOpenMode(OpenMode.CREATE);//from w ww . j av a2s. c om iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression indexWriter = new IndexWriter(indexDirectory, iwc); indexAnalyzer = getIndexAnalyzer(); if (INDEX_PREDICATES) printlnProg("Indexing individual predicates"); if (INDEX_TEXT) printlnProg("Indexing combined predicate text values"); if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages); }
From source file:gov.nist.basekb.FreebaseTools.java
License:LGPL
public void initializeIndexBuilder() throws Exception { // Create a new index directory and writer to index a triples file. // Raise an error if an index already exists, so we don't accidentally overwrite it. String indexDir = getIndexDirectoryName(); if ((new java.io.File(indexDir)).isDirectory()) throw new IOException("Index directory already exists, remove it before indexing"); indexDirectory = FSDirectory.open(Paths.get(indexDir)); IndexWriterConfig iwc = new IndexWriterConfig(getIndexAnalyzer()); // we always create a new index from scratch: iwc.setOpenMode(OpenMode.CREATE);/*from w w w . j a va 2 s. c o m*/ iwc.setCodec(new Lucene54Codec(Mode.BEST_SPEED)); // the default //iwc.setCodec(new Lucene54Codec(Mode.BEST_COMPRESSION)); // slower, but better compression indexWriter = new IndexWriter(indexDirectory, iwc); indexAnalyzer = getIndexAnalyzer(); if (INDEX_PREDICATES) printlnProg("Indexing individual predicates"); if (INDEX_TEXT) printlnProg("Indexing combined predicate text values"); if (INDEX_LANGUAGE) printlnProg("Indexing predicates for language(s): " + supportedLanguages); }
From source file:lsre.utils.LuceneUtils.java
License:Open Source License
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter/*w ww . j a v a 2 s .co m*/ * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. else if (analyzer == AnalyzerType.StandardAnalyzer) tmpAnalyzer = new StandardAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); config.setRAMBufferSizeMB(512); config.setCommitOnClose(true); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LsreCustomCodec()); return new IndexWriter(directory, config); }
From source file:lsre.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;/* w ww . ja v a 2 s . co m*/ if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LsreCustomCodec()); return new IndexWriter(directory, config); }
From source file:net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java
License:Open Source License
public void run() { IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer()); config.setOpenMode(openMode);/*from w w w . jav a2 s.c o m*/ config.setCodec(new LireCustomCodec()); try { if (imageDirectory != null) System.out.println("Getting all images in " + imageDirectory + "."); writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config); if (imageList == null) { files = FileUtils.getAllImages(new File(imageDirectory), true); } else { files = new LinkedList<String>(); BufferedReader br = new BufferedReader(new FileReader(imageList)); String line = null; while ((line = br.readLine()) != null) { if (line.trim().length() > 3) files.add(line.trim()); } } numImages = files.size(); System.out.printf("Indexing %d images\n", files.size()); Thread p = new Thread(new Producer()); p.start(); LinkedList<Thread> threads = new LinkedList<Thread>(); long l = System.currentTimeMillis(); for (int i = 0; i < numberOfThreads; i++) { Thread c = new Thread(new Consumer()); c.start(); threads.add(c); } Thread m = new Thread(new Monitoring()); m.start(); for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) { iterator.next().join(); } long l1 = System.currentTimeMillis() - l; int seconds = (int) (l1 / 1000); int minutes = seconds / 60; seconds = seconds % 60; // System.out.println("Analyzed " + overallCount + " images in " + seconds + " seconds, ~" + ((overallCount>0)?(l1 / overallCount):"n.a.") + " ms each."); System.out.printf("Analyzed %d images in %03d:%02d ~ %3.2f ms each.\n", overallCount, minutes, seconds, ((overallCount > 0) ? ((float) l1 / (float) overallCount) : -1f)); writer.commit(); writer.forceMerge(1); writer.close(); threadFinished = true; // add local feature hist here } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:net.semanticmetadata.lire.indexing.tools.Indexor.java
License:Open Source License
public void run() { // do it ...// w ww. j av a 2 s . co m try { // IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setCodec(new LireCustomCodec()); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config); for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext();) { File inputFile = iterator.next(); if (verbose) System.out.println("Processing " + inputFile.getPath() + "."); readFile(indexWriter, inputFile); if (verbose) System.out.println("Indexing finished."); } indexWriter.commit(); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
/** * Creates an IndexWriter for given index path, with given analyzer. * * @param directory the path to the index directory * @param create set to true if you want to create a new index * @param analyzer gives the analyzer used for the Indexwriter. * @return an IndexWriter/*from w w w .j a va 2 s. c om*/ * @throws IOException */ public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null; if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); // LetterTokenizer with LowerCaseFilter else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // WhitespaceTokenizer else if (analyzer == AnalyzerType.KeywordAnalyzer) tmpAnalyzer = new KeywordAnalyzer(); // entire string as one token. else if (analyzer == AnalyzerType.StandardAnalyzer) tmpAnalyzer = new StandardAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); config.setRAMBufferSizeMB(512); config.setCommitOnClose(true); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:net.semanticmetadata.lire.utils.LuceneUtils.java
License:Open Source License
public static IndexWriter createIndexWriter(Directory directory, boolean create, AnalyzerType analyzer, double RAMBufferSize) throws IOException { // set the analyzer according to the method params Analyzer tmpAnalyzer = null;/*from ww w . java 2 s . c o m*/ if (analyzer == AnalyzerType.SimpleAnalyzer) tmpAnalyzer = new SimpleAnalyzer(); else if (analyzer == AnalyzerType.WhitespaceAnalyzer) tmpAnalyzer = new WhitespaceAnalyzer(); // The config IndexWriterConfig config = new IndexWriterConfig(tmpAnalyzer); if (create) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // overwrite if it exists. else config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // create new if none is there, append otherwise. config.setRAMBufferSizeMB(RAMBufferSize); config.setCodec(new LireCustomCodec()); return new IndexWriter(directory, config); }
From source file:nicta.com.au.patent.pac.index.PACIndexer.java
public PACIndexer(String indexDir) throws IOException { File indexDirFile = new File(indexDir); Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put(PatentDocument.Title, new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.TITLE_ENGLISH_STOP_WORDS_SET)); analyzerPerField.put(PatentDocument.Abstract, new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.ABSTRACT_ENGLISH_STOP_WORDS_SET)); analyzerPerField.put(PatentDocument.Description, new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.DESCRIPTION_ENGLISH_STOP_WORDS_SET)); analyzerPerField.put(PatentDocument.Claims, new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.CLAIMS_ENGLISH_STOP_WORDS_SET)); aWrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_48), analyzerPerField); analyzer = new EnglishAnalyzer(Version.LUCENE_48, PatentsStopWords.ENGLISH_STOP_WORDS_SET); // analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_48, aWrapper); conf.setUseCompoundFile(false);/*w ww. j a v a 2 s .c o m*/ conf.setCodec(new SimpleTextCodec()); writer = new IndexWriter(FSDirectory.open(indexDirFile), conf); }
From source file:org.apache.blur.filter.FilterCacheTest.java
License:Apache License
private void writeDocs(FilterCache filterCache, RAMDirectory directory) throws IOException { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setCodec(new Blur024Codec()); IndexWriter indexWriter = new IndexWriter(directory, conf); int count = 10000; addDocs(indexWriter, count);//from w w w .j a v a 2 s.c o m indexWriter.close(); }