List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically * enriched by a URI pointing to a SKOS concept "weapons". * <p/>/* ww w . ja v a2 s.c o m*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.URIbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically enriched * by a URI pointing to a SKOS concept "weapons". * //from ww w . j a v a2 s . c o m * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", Field.Store.NO, Field.Index.ANALYZED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(matchVersion, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(matchVersion), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(matchVersion, indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery query1 = new BooleanQuery(); query1.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD); query1.add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(IndexReader.open(writer, false)); TopDocs results = searcher.search(query1, 10); /* the document matches because "arms" is among the expanded terms */ Assert.assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query2 = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query2, 10); /* ... also returns the document as result */ Assert.assertEquals(1, results.totalHits); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates an index writer in the specified directory. It will create/recreate * the target directory//from ww w.j a v a 2 s . c om * * @param directory * @param analyzer * @return * @throws Exception */ protected IndexWriter createIndexWriter(File directory, Analyzer analyzer, boolean replace) throws Exception { IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer); if (replace) conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); else conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); if (directory.exists() && replace) { FileUtils.forceDelete(directory); } FileUtils.forceMkdir(directory); IndexWriter iw = new IndexWriter(FSDirectory.open(directory), conf); return iw; }
From source file:au.org.ala.names.search.ALANameSearcher.java
License:Open Source License
private File createIfNotExist(String indexDirectory) throws IOException { File idxFile = new File(indexDirectory); if (!idxFile.exists()) { FileUtils.forceMkdir(idxFile);/*from www. ja va2 s. c o m*/ Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer); IndexWriter iw = new IndexWriter(FSDirectory.open(idxFile), conf); iw.commit(); iw.close(); } return idxFile; }
From source file:back.Indexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = ".\\indexed"; String docsPath = ".//artigos"; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];//w ww. j a v a2 s. co m i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT, new CharArraySet(Version.LUCENE_CURRENT, 0, false)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:bajavista.IndiceInvertido.java
public void crearIndiceInvertido() throws IOException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); // 1. Create the index File indexDirES = new File(dirIndexES); Directory indexES = FSDirectory.open(indexDirES); //File indexDirNONES = new File(dirIndexNONES); //Directory indexNONES = FSDirectory.open(indexDirNONES); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); IndexWriter wES = new IndexWriter(indexES, config); //IndexWriter wNONES = new IndexWriter(indexNONES, config); ConexionBD db = new ConexionBD(); try {/*www .j a va 2s .c om*/ try (PreparedStatement consulta = db.getConnection().prepareStatement("SELECT * FROM Tweet"); ResultSet res = consulta.executeQuery()) { while (res.next()) { //System.out.println(res.getString("idUser") +" "+ res.getString("timestamp") +" "+ res.getString("text") +" "+ res.getString("objective") +" "+ res.getString("subjective") +" "+ res.getString("positive") +" "+ res.getString("negative") +" "+ res.getString("need")); agregarDoc(wES, res.getString("idUser"), res.getString("timestamp"), res.getString("text"), res.getString("objective"), res.getString("subjective"), res.getString("positive"), res.getString("negative"), res.getString("need")); } } } catch (Exception e) { System.out.print("No se pudo consultar a la base de datos\n" + e); } // try { // File f = new File(baseDatosNONES); // FileReader fr = new FileReader(f); // BufferedReader br = new BufferedReader(fr); // String linea = br.readLine(); // // while ((linea = br.readLine()) != null) { // StringTokenizer separarLinea = new StringTokenizer(linea, "|"); // String next = separarLinea.nextToken();; // String next1 = separarLinea.nextToken();; // String next2 = separarLinea.nextToken();; // // addDoc(wNONES, next, next1,next2); // } // // fr.close(); // } catch (Exception e) { // System.out.println("Error en la lectura del archivo..."); // } db.desconectar(); wES.close(); //wNONES.close(); }
From source file:bbejeck.nosql.lucene.LuceneSqlFileSystemSearchBase.java
License:Apache License
public void init(String path) throws Exception { fsDirectory = FSDirectory.open(Paths.get(path)); iwriter = new IndexWriter(fsDirectory, config); }
From source file:bbejeck.nosql.lucene.LuceneSqlSearchBase.java
License:Apache License
public void init() throws Exception { iwriter = new IndexWriter(ramDirectory, config); }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.LuceneIndexBolt.java
License:Apache License
@Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; try {//from w w w . j av a 2s. c o m logger.info("Opening index"); Directory dir = FSDirectory.open(new File(indexLocation)); NewsRecLuceneAnalyzer analyzer = LanguageAnalyzerHelper.getInstance().getAnalyzer(Locale.ENGLISH); this.termExtract = new LuceneTopTermExtract(analyzer); IndexWriterConfig config = new IndexWriterConfig(Config.LUCENE_VERSION, analyzer); writer = new IndexWriter(dir, config); } catch (IOException ex) { logger.error(ex); } }
From source file:be.ugent.tiwi.sleroux.newsrec.stormNewsFetch.storm.bolts.LuceneIndexBolt.java
License:Apache License
@Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; try {// w w w. ja v a2 s . c o m logger.info("Opening index"); Directory dir = FSDirectory.open(new File(indexLocation)); EnAnalyzer analyzer = new EnAnalyzer(); analyzer.setStopwords(getStopwords(stopwordsLocation)); IndexWriterConfig config = new IndexWriterConfig(Config.LUCENE_VERSION, analyzer); writer = new IndexWriter(dir, config); } catch (IOException ex) { logger.error(ex); } }