Example usage for org.apache.lucene.index IndexWriter IndexWriter

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter IndexWriter.

Prototype

public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException

Source Link

Document

Constructs a new IndexWriter per the settings given in conf.

Usage

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();/*from  w ww. jav  a  2  s.c  o m*/
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:IndexAndSearchOpenStreetMaps1D.java

License:Apache License

private static void createIndex() throws IOException {

    long t0 = System.nanoTime();

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is = Files
            .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : "")));

    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //iwc.setMaxBufferedDocs(109630);
    //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setRAMBufferSizeMB(256.0);/*from  w w  w .ja v a  2s  .  co  m*/
    iwc.setMergePolicy(new LogDocMergePolicy());
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    IndexWriter w = new IndexWriter(dir, iwc);

    int count = 0;
    byte[] scratch = new byte[4];
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }

        String[] parts = line.split(",");
        //long id = Long.parseLong(parts[0]);
        int lat = (int) (1000000. * Double.parseDouble(parts[1]));
        //int lon = (int) (1000000. * Double.parseDouble(parts[2]));
        Document doc = new Document();
        if (USE_NF) {
            doc.add(new LegacyIntField("latnum", lat, Field.Store.NO));
            //doc.add(new LongField("lonnum", lon, Field.Store.NO));
        } else {
            doc.add(new IntPoint("lat", lat));
            //doc.add(new SortedNumericDocValuesField("lon", lon));
        }
        w.addDocument(doc);
        count++;
        if (count % 1000000 == 0) {
            System.out.println(count + "...");
        }
    }
    //w.forceMerge(1);
    w.commit();
    System.out.println(w.maxDoc() + " total docs");

    w.close();
    long t1 = System.nanoTime();
    System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index");
}

From source file:IndexTaxis.java

License:Apache License

public static void main(String[] args) throws Exception {
    Path indexPath = Paths.get(args[0]);
    Directory dir = FSDirectory.open(indexPath);
    int threadCount = Integer.parseInt(args[1]);
    Path docsPath = Paths.get(args[2]);

    IndexWriterConfig iwc = new IndexWriterConfig();
    //System.out.println("NOW SET INFO STREAM");
    iwc.setRAMBufferSizeMB(1024.);/*from  w w w .  j a  v a 2s . c om*/
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).disableAutoIOThrottle();

    final IndexWriter w = new IndexWriter(dir, iwc);

    BufferedInputStream docs = new BufferedInputStream(Files.newInputStream(docsPath, StandardOpenOption.READ));

    // parse the header fields
    List<String> fieldsList = new ArrayList<>();
    StringBuilder builder = new StringBuilder();
    while (true) {
        int x = docs.read();
        if (x == -1) {
            throw new IllegalArgumentException(
                    "hit EOF while trying to read CSV header; are you sure you have the right CSV file!");
        }
        byte b = (byte) x;
        if (b == NEWLINE) {
            fieldsList.add(builder.toString());
            break;
        } else if (b == COMMA) {
            fieldsList.add(builder.toString());
            builder.setLength(0);
        } else {
            // this is OK because headers are all ascii:
            builder.append((char) b);
        }
    }

    final String[] fields = fieldsList.toArray(new String[fieldsList.size()]);

    Thread[] threads = new Thread[threadCount];

    final AtomicInteger docCounter = new AtomicInteger();
    final AtomicLong bytesCounter = new AtomicLong();

    startNS = System.nanoTime();

    for (int i = 0; i < threadCount; i++) {
        final int threadID = i;
        threads[i] = new Thread() {
            @Override
            public void run() {
                try {
                    _run();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

            private void _run() throws IOException {
                while (true) {
                    Chunk chunk = readChunk(docs);
                    if (chunk == null) {
                        break;
                    }
                    indexOneChunk(fields, chunk, w, docCounter, bytesCounter);
                }
            }
        };
        threads[i].start();
    }

    for (int i = 0; i < threadCount; i++) {
        threads[i].join();
    }
    System.out.println("Indexing done; now close");

    w.close();
    docs.close();
}

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Start to scan the glob of xml files// w w w. j a va  2  s.  co  m
 * 
 * @param indexDir where the lucene indexes are generated
 * @param anAnalyzer Analyzer to use for analyzed fields
 * @param similarity instance of Similarity to work with the writer
 * @throws TransformerConfigurationException 
 */
static public void walk(String xmlGlob, String xslFile, String indexDir)
        throws IOException, TransformerConfigurationException {

    info("Lucene, src:" + xmlGlob + " parser:" + xslFile + " index:" + indexDir);

    Path srcDir = Paths.get(xmlGlob);
    PathMatcher glob = FileSystems.getDefault().getPathMatcher("glob:*.xml");
    if (!Files.isDirectory(srcDir)) {
        String pattern = srcDir.getFileName().toString();
        glob = FileSystems.getDefault().getPathMatcher("glob:" + pattern);
        srcDir = srcDir.getParent();
    }
    if (!Files.isDirectory(srcDir)) {
        fatal("FATAL " + srcDir + " NOT FOUND");
    }

    Path indexPath = Paths.get(indexDir);
    Files.createDirectories(indexPath);
    Directory dir = FSDirectory.open(indexPath);

    // TODO configure analyzers
    Analyzer analyzer = new XmlAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
    conf.setSimilarity(new BM25Similarity());
    conf.setCodec(new ChapitreCodec());
    // Optional: for better indexing performance, if you
    // are indexing many documents, increase the RAM
    // buffer.  But if you do this, increase the max heap
    // size to the JVM (eg add -Xmx512m or -Xmx1g):
    //
    // conf.setRAMBufferSizeMB(256.0);
    lucwriter = new IndexWriter(dir, conf);

    System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
    TransformerFactory tf = TransformerFactory.newInstance();
    tf.setAttribute("http://saxon.sf.net/feature/version-warning", Boolean.FALSE);
    tf.setAttribute("http://saxon.sf.net/feature/recoveryPolicy", new Integer(0));
    parser = tf.newTransformer(new StreamSource(xslFile));

    final PathMatcher matcher = glob; // transmit the matcher by a final variable to the anonymous class
    Files.walkFileTree(srcDir, new SimpleFileVisitor<Path>() {
        @Override
        public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) {
            if (path.getFileName().toString().startsWith("."))
                return FileVisitResult.CONTINUE;
            if (!matcher.matches(path.getFileName()))
                return FileVisitResult.CONTINUE;
            parse(path);
            return FileVisitResult.CONTINUE;
        }

        public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs) {
            // .git, .svn
            if (path.getFileName().toString().startsWith("."))
                return FileVisitResult.SKIP_SUBTREE;
            return FileVisitResult.CONTINUE;
        }
    });

    lucwriter.commit();
    // NOTE: if you want to maximize search performance,
    // you can optionally call forceMerge here.  This can be
    // a terribly costly operation, so generally it's only
    // worth it when your index is relatively static (ie
    // you're done adding documents to it):
    //
    lucwriter.forceMerge(1);
    lucwriter.close();
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;// w  w w  . j a  v a  2 s. com
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        //Analyzer analyzer = new StandardAnalyzer();
        //Analyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        Analyzer analyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Updates the index//from w  w w . ja  va2 s .  c  om
 *
 * @throws IOException
 */
public void updateIndex() throws IOException {
    try {
        long startTime = System.nanoTime();
        // Get the index directory
        Directory dir = FSDirectory.open(Paths.get(indexDirectory));
        // Get the directory for resources
        String resourcesDir = resourceDirectory + "/" + Constants.CSV_LOCATION;
        // Get PDF Analyzer
        Analyzer pdf_analyzer = new PDFAnalyzer(resourceDirectory + "/" + Constants.STOPWORDS_FILE);
        // Create an index writer config with the analyzer
        IndexWriterConfig iwc = new IndexWriterConfig(pdf_analyzer);
        // Set the open mode to create or append
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // Set index to be created
        // Create an index writer
        IndexWriter writer = new IndexWriter(dir, iwc);
        // Index the documents
        indexDocs(writer, resourcesDir);
        long endTime = System.nanoTime();
        LuceneIndexReader.getInstance().initializeIndexReader(writer);
        writer.close();
        log.info("Took: " + (endTime - startTime) / Math.pow(10, 6) + " milliseconds to generate the index.");
    } catch (IOException e) {
        log.error("IO Exception Thrown while updating index " + e.getMessage() + "\n");
    }

}

From source file:Application.mediaIndexer.java

public static void IndexFiles(String index, String docsPath, TextArea results, boolean CreateOrUpdate,
        boolean removeFiles) throws IOException {
    String indexPath = index;/*from w  w  w  . j av a 2 s  .  c o m*/
    boolean create = CreateOrUpdate;
    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir))
        results.appendText("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path" + "\n");
    Date start = new Date();
    try {
        if (removeFiles)
            results.appendText("Deleting '" + docsPath + "' from directory '" + indexPath + "'..." + "\n");
        // else results.appendText("results '" + docsPath + "' to directory
        // '" + indexPath + "'..." + "\n");
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (create)
            iwc.setOpenMode(OpenMode.CREATE);
        else
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        // Optional: for better results performance, if you are results many
        // documents, increase the RAM buffer.
        // But if you do this, increase the max heap size to the JVM (eg add
        // -Xmx512m or -Xmx1g):
        // iwc.setRAMBufferSizeMB(2048.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir, results, removeFiles);
        writer.close();
        Date end = new Date();
        long diffInSeconds = (end.getTime() - start.getTime()) / 1000;
        results.appendText(diffInSeconds + " total seconds" + "\n");
    } catch (SAXException e) {
        e.printStackTrace();
    } catch (TikaException e) {
        e.printStackTrace();
    }
}

From source file:apps.LuceneIndexer.java

License:Apache License

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output directory");
    options.addOption("r", null, true, "optional output TREC-format QREL file");

    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    // If you increase this value, you may need to modify the following line in *.sh file
    // export MAVEN_OPTS="-Xms8192m -server"
    double ramBufferSizeMB = 1024 * 8; // 8 GB

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    IndexWriter indexWriter = null;/*  w  ww.  j av  a 2s.co m*/
    BufferedWriter qrelWriter = null;

    int docNum = 0;

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputDirName = null, qrelFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputDirName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'index directory'", options);
        }

        if (cmd.hasOption("r")) {
            qrelFileName = cmd.getOptionValue("r");
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        if (qrelFileName != null)
            qrelWriter = new BufferedWriter(new FileWriter(qrelFileName));

        File outputDir = new File(outputDirName);
        if (!outputDir.exists()) {
            if (!outputDir.mkdirs()) {
                System.out.println("couldn't create " + outputDir.getAbsolutePath());
                System.exit(1);
            }
        }
        if (!outputDir.isDirectory()) {
            System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
            System.exit(1);
        }
        if (!outputDir.canWrite()) {
            System.out.println("Can't write to " + outputDir.getAbsolutePath());
            System.exit(1);
        }

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName));
        IndexWriterConfig indexConf = new IndexWriterConfig(analyzer);

        /*
            OpenMode.CREATE creates a new index or overwrites an existing one.
            https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE
        */
        indexConf.setOpenMode(OpenMode.CREATE);
        indexConf.setRAMBufferSizeMB(ramBufferSizeMB);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b));
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b));
        }

        indexWriter = new IndexWriter(indexDir, indexConf);

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(null);

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            Document luceneDoc = new Document();
            ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText);
            String cleanText = spaceJoin.join(cleanedToks);

            //        System.out.println(inpDoc.mDocId);
            //        System.out.println(cleanText);
            //        System.out.println("==============================");

            luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES));
            luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES));
            indexWriter.addDocument(luceneDoc);

            if (inpDoc.mIsRel != null && qrelWriter != null) {
                saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0);
            }
            if (docNum % 1000 == 0)
                System.out.println(String.format("Indexed %d documents", docNum));

        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Indexed %d documents", docNum));

        try {
            if (null != indexWriter)
                indexWriter.close();
            if (null != qrelWriter)
                qrelWriter.close();
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.AbstractTermExpansionTest.java

License:Apache License

/**
 * This test indexes a sample metadata record (=lucene document) having a
 * "title", "description", and "subject" field, which contains plain subject
 * terms.//from  www  . j av a2s .co  m
 * 
 * A search for "arms" doesn't return that record because the term "arms" is
 * not explicitly contained in the record (document).
 * 
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 */
@Test
public void noExpansion() throws CorruptIndexException, LockObtainFailedException, IOException {

    /* defining the document to be indexed */
    Document doc = new Document();
    doc.add(new Field("title", "Spearhead", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("description",
            "Roman iron spearhead. The spearhead was attached to one end of a wooden shaft..."
                    + "The spear was mainly a thrusting weapon, but could also be thrown. "
                    + "It was the principal weapon of the auxiliary soldier... "
                    + "(second - fourth century, Arbeia Roman Fort).",
            Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field("subject", "weapons", Field.Store.NO, Field.Index.ANALYZED));

    /* setting up a writer with a default (simple) analyzer */
    writer = new IndexWriter(new RAMDirectory(),
            new IndexWriterConfig(Version.LUCENE_36, new SimpleAnalyzer(Version.LUCENE_36)));

    /* adding the document to the index */
    writer.addDocument(doc);

    /* defining a query that searches over all fields */
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("title", "arms")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("description", "arms")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("subject", "arms")), BooleanClause.Occur.SHOULD);

    /* creating a new searcher */
    searcher = new IndexSearcher(IndexReader.open(writer, false));

    TopDocs results = searcher.search(query, 10);

    /* no results are returned since there is no term match */
    Assert.assertEquals(0, results.totalHits);

}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Creates the synonym index/*from   www.j  a  v a2  s  . c  o  m*/
 *
 * @throws IOException
 */
private void indexSKOSModel() throws IOException {
    IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(indexDir, cfg);
    writer.getConfig().setRAMBufferSizeMB(48);
    /* iterate SKOS concepts, create Lucene docs and add them to the index */
    ResIterator concept_iter = skosModel.listResourcesWithProperty(RDF.type, SKOS.Concept);
    while (concept_iter.hasNext()) {
        Resource skos_concept = concept_iter.next();
        Document concept_doc = createDocumentsFromConcept(skos_concept);
        writer.addDocument(concept_doc);
    }
    writer.close();
}