List of usage examples for org.apache.lucene.document TextField TYPE_NOT_STORED
FieldType TYPE_NOT_STORED
To view the source code for org.apache.lucene.document TextField TYPE_NOT_STORED.
Click Source Link
From source file:alix.lucene.Alix.java
License:Open Source License
/** * Parse field type String/* w w w .ja v a 2 s . c om*/ * * @param name Name of the field * @param value Value of the field * @param options a string composed of letters in any order following Luke convention to describe fields * IdfpoPSV * I: Indexed * d: docs * f: freqs * p: pos * o: offset * P: payloads * S: Stored * V: TermVector */ public static FieldType fieldType(String options) { FieldType type; if (options == null) return new FieldType(); if ("S".equals(options)) { type = new FieldType(); type.setStored(true); return type; } if (options.contains("S")) { type = new FieldType(TextField.TYPE_STORED); } else { type = new FieldType(TextField.TYPE_NOT_STORED); } // optimize ? type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); if (options.contains("p")) { type.setStoreTermVectorPositions(true); } if (options.contains("o")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorOffsets(true); } if (options.contains("P")) { type.setTokenized(true); type.setStoreTermVectors(true); type.setStoreTermVectorPositions(true); type.setStoreTermVectorPayloads(true); } if (options.contains("V")) { type.setTokenized(true); type.setStoreTermVectors(true); } return type; }
From source file:at.ac.univie.mminf.luceneSKOS.test.SKOSURIFilterTest.java
License:Apache License
@Test public void singleUriExpansionWithUnstoredField() throws IOException { Document doc = new Document(); doc.add(new Field("subject", "http://example.com/concept/1", TextField.TYPE_NOT_STORED)); writer.addDocument(doc);/* w w w. jav a2 s . c o m*/ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); Query query = new TermQuery(new Term("subject", "jumps")); TopDocs results = searcher.search(query, 10); assertEquals(1, results.totalHits); Document indexDoc = searcher.doc(results.scoreDocs[0].doc); String[] fieldValues = indexDoc.getValues("subject"); assertEquals(0, fieldValues.length); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.AbstractTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which contains plain subject * terms./*from w w w . java 2 s . c o m*/ * <p/> * A search for "arms" doesn't return that record because the term "arms" is * not explicitly contained in the record (document). * * @throws IOException * @throws LockObtainFailedException * @throws CorruptIndexException */ @Test public void noExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new SimpleAnalyzer())); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* no results are returned since there is no term match */ assertEquals(0, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.LabelbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field. * <p/>/*from www .j a va2 s. c o m*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label for "weapons", the term which is * contained in the subject field. * * @throws IOException */ @Test public void labelBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "weapons", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.LABEL); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:at.ac.univie.mminf.luceneSKOS.test.termexpansion.URIbasedTermExpansionTest.java
License:Apache License
/** * This test indexes a sample metadata record (=lucene document) having a * "title", "description", and "subject" field, which is semantically * enriched by a URI pointing to a SKOS concept "weapons". * <p/>/*from w w w. j a va 2 s . c om*/ * A search for "arms" returns that record as a result because "arms" is * defined as an alternative label (altLabel) for the concept "weapons". * * @throws IOException */ @Test public void uriBasedTermExpansion() throws IOException { /* defining the document to be indexed */ Document doc = new Document(); doc.add(new Field("title", "Spearhead", TextField.TYPE_STORED)); doc.add(new Field("description", "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..." + "The spear was mainly a thrusting weapon, but could also be thrown. " + "It was the principal weapon of the auxiliary soldier... " + "(second - fourth century, Arbeia Roman Fort).", TextField.TYPE_NOT_STORED)); doc.add(new Field("subject", "http://www.ukat.org.uk/thesaurus/concept/859", TextField.TYPE_NOT_STORED)); /* setting up the SKOS analyzer */ String skosFile = "src/test/resources/skos_samples/ukat_examples.n3"; String indexPath = "build/"; /* ExpansionType.URI->the field to be analyzed (expanded) contains URIs */ Analyzer skosAnalyzer = new SKOSAnalyzer(indexPath, skosFile, ExpansionType.URI); /* Define different analyzers for different fields */ Map<String, Analyzer> analyzerPerField = new HashMap<>(); analyzerPerField.put("subject", skosAnalyzer); PerFieldAnalyzerWrapper indexAnalyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(), analyzerPerField); /* setting up a writer with a default (simple) analyzer */ writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(indexAnalyzer)); /* adding the document to the index */ writer.addDocument(doc); /* defining a query that searches over all fields */ BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD); /* creating a new searcher */ searcher = new IndexSearcher(DirectoryReader.open(writer, false)); TopDocs results = searcher.search(builder.build(), 10); /* the document matches because "arms" is among the expanded terms */ assertEquals(1, results.totalHits); /* defining a query that searches for a broader concept */ Query query = new TermQuery(new Term("subject", "military equipment")); results = searcher.search(query, 10); /* ... also returns the document as result */ assertEquals(1, results.totalHits); }
From source file:collene.Freedb.java
License:Apache License
public static void BuildIndex(Directory directory) throws Exception { String freedbPath = "/Users/gdusbabek/Downloads/freedb-complete-20140701.tar.bz2"; if (directory == null) { System.out.println("Need to specify: { memory | file | cassandra }. Did you misspell something?"); System.exit(-1);/*from www .ja va2s . c o m*/ } FreeDbReader reader = new FreeDbReader(new File(freedbPath), 50000); reader.start(); long indexStart = System.currentTimeMillis(); Collection<Document> documents = new ArrayList<Document>(BATCH_SIZE); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); // stop after this many documents. final int maxDocuments = 400000; //Integer.MAX_VALUE; FreeDbEntry entry = reader.next(); int count = 0; while (entry != null && count < maxDocuments) { Document doc = new Document(); String any = entry.toString(); doc.add(new Field("any", any, TextField.TYPE_STORED)); doc.add(new Field("artist", entry.getArtist(), TextField.TYPE_NOT_STORED)); doc.add(new Field("album", entry.getAlbum(), TextField.TYPE_NOT_STORED)); doc.add(new Field("title", entry.getTitle(), TextField.TYPE_NOT_STORED)); doc.add(new Field("genre", entry.getGenre(), TextField.TYPE_NOT_STORED)); doc.add(new Field("year", entry.getYear(), TextField.TYPE_NOT_STORED)); for (int i = 0; i < entry.getTrackCount(); i++) { doc.add(new Field("track", entry.getTrack(i), TextField.TYPE_STORED)); } documents.add(doc); if (VERBOSE) { out.println(any); } if (documents.size() == BATCH_SIZE) { //out.println(String.format("Adding batch at count %d", count)); writer.addDocuments(documents); //out.println("done"); documents.clear(); } count += 1; if (count >= MAX_ENTRIES) { // done indexing. break; } entry = reader.next(); if (count % 100000 == 0) { out.println(String.format("Indexed %d documents", count)); // do a quick morrissey search for fun. // IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ColDirectory.open( // new CassandraIO(8192, "collene", "cindex").start("127.0.0.1:9042"), // new CassandraIO(8192, "collene", "cmeta").start("127.0.0.1:9042"), // new CassandraIO(8192, "collene", "clock").start("127.0.0.1:9042") // ))); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc d : docs.scoreDocs) { out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex)); } } } if (documents.size() > 0) { out.println(String.format("Adding batch at count %d", count)); writer.addDocuments(documents); out.println("done"); documents.clear(); // do a quick morrissey search for fun. IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc d : docs.scoreDocs) { out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex)); } } long indexTime = System.currentTimeMillis() - indexStart; out.println(String.format("Indexed %d things in %d ms (%s)", count, indexTime, directory.toString())); // long startMerge = System.currentTimeMillis(); // writer.forceMerge(1, true); // long endMerge = System.currentTimeMillis(); // out.println(String.format("merge took %d ms", endMerge-startMerge)); out.println("I think these are the files:"); for (String s : directory.listAll()) { out.println(s); } writer.close(true); directory.close(); }
From source file:collene.TestIndexing.java
License:Apache License
@Test public void test() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); // write it out. IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < 100; i++) { Collection<Document> documents = new ArrayList<Document>(); Document doc = new Document(); doc.add(new Field("key", "aaa_" + i, TextField.TYPE_STORED)); doc.add(new Field("not", "notaaa", TextField.TYPE_NOT_STORED)); doc.add(new Field("meta", "aaa_meta_aaa_" + i, TextField.TYPE_STORED)); documents.add(doc);/*from w w w. ja v a2s.c o m*/ writer.addDocuments(documents); writer.commit(); writer.forceMerge(1); writer.forceMergeDeletes(true); } // now read it back. IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "key", analyzer); Query query = parser.parse("aaa_4"); TopDocs docs = searcher.search(query, 1); int idToDelete = docs.scoreDocs[0].doc; Assert.assertTrue(docs.totalHits > 0); query = parser.parse("fersoius"); docs = searcher.search(query, 1); Assert.assertFalse(docs.totalHits > 0); // delete that document. DirectoryReader reader = DirectoryReader.open(writer, true); writer.tryDeleteDocument(reader, idToDelete); reader.close(); writer.close(); // list files Set<String> files = new HashSet<String>(); System.out.println("Listing files for " + directory.toString()); for (String file : directory.listAll()) { files.add(file); System.out.println(" " + file); } if (strictFileChecking) { System.out.println("String file checking..."); Sets.SetView<String> difference = Sets.difference(expectedFiles, files); Assert.assertEquals(Joiner.on(",").join(difference), 0, difference.size()); } reader = DirectoryReader.open(directory); searcher = new IndexSearcher(reader); query = parser.parse("aaa_4"); docs = searcher.search(query, 1); reader.close(); Assert.assertFalse(docs.totalHits > 0); directory.close(); }
From source file:com.basistech.lucene.tools.LuceneQueryToolTest.java
License:Apache License
@BeforeClass public static void oneTimeSetup() throws IOException, ParseException { LuceneQueryToolTest.showOutput = false; // for debugging tests Directory dir = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new Field("longest-mention", "Bill Clinton", StringField.TYPE_STORED)); doc.add(new Field("context", "Hillary Clinton Arkansas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc);// ww w . ja v a2 s .c om doc = new Document(); doc.add(new Field("longest-mention", "George W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Texas Laura Bush", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("longest-mention", "George H. W. Bush", StringField.TYPE_STORED)); doc.add(new Field("context", "Barbara Bush Texas", TextField.TYPE_NOT_STORED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("bbb", "foo", StringField.TYPE_STORED)); doc.add(new Field("bbb", "bar", StringField.TYPE_STORED)); doc.add(new Field("aaa", "foo", StringField.TYPE_STORED)); FieldType typeUnindexed = new FieldType(StringField.TYPE_STORED); typeUnindexed.setIndexOptions(IndexOptions.NONE); doc.add(new Field("zzz", "foo", typeUnindexed)); writer.addDocument(doc); writer.close(); reader = DirectoryReader.open(dir); }
From source file:com.epam.wilma.message.search.lucene.index.FileIndexer.java
License:Open Source License
/** * Adds a file to index with {@link IndexWriter}. * @param file will be indexed by the function *///from ww w .ja va2 s .co m public void indexFile(final File file) { FileInputStream fis = getInputStream(file); Document doc = documentFactory.createDocument(); // Add the path of the file as a field named "path". Use a field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency or positional information: Field pathField = new StringField(fieldName, file.getAbsolutePath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with NumericRangeFilter). doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // If that's not the case searching for special characters will fail. BufferedReader bufferedReader; try { bufferedReader = bufferedReaderFactory.createReader(fis); doc.add(new Field("contents", bufferedReader, TextField.TYPE_NOT_STORED)); addDocument(file, doc); fis.close(); } catch (IOException e) { logger.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.LuceneDocumentMapper.java
License:Open Source License
@Override public List<FieldType> getFieldTypes() { return Arrays.asList(IntField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_STORED, TextField.TYPE_NOT_STORED, BinaryDocValuesField.TYPE);/* w w w . j a v a 2s . com*/ }