Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:MyServlet.java

public void init() throws ServletException {
    try {//from ww  w  .jav  a  2s. co  m
        //   Specify the analyzer for tokenizing text.
        //   The same analyzer should be used for indexing and searching
        analyzer = new StandardAnalyzer();
        //   Code to create the index
        index = new RAMDirectory();
        config = new IndexWriterConfig(analyzer);
        w = new IndexWriter(index, config);
        addDoc(w, " Software Engineering 2", "CMPE 133", "Mon.", "Computer Engineering");
        addDoc(w, " Software Engineering 1", "CMPE 131", "Mon.", "Computer Engineering");
        addDoc(w, " Object Oriented Design", "CS 151", "Mon.", "Computer Science");
        addDoc(w, " Advance Data Structures with Java ", "CS 146:", "Mon.", "Computer Science");
        addDoc(w, " System Security with Java", "CS 166:", "Mon.", "Computer Science");
        addDoc(w, "Liner math", "ME 123", "Mon.", "Math");
        w.close();
        log = new searchHistory();
        for (int i = 1; i <= 10; i++) {
            Student std = new Student();
            std.setUserName("std" + i);
            std.setPassword("123");
            stds.add(std);
            Teacher tch = new Teacher();
            tch.setUserName("tch" + i);
            tch.setPassword("123");
            tchs.add(tch);
        }
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    System.out.println("init");

}

From source file:MyServlet.java

private void gotoAddCourse(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    String name = request.getParameter("name");
    String id = request.getParameter("id");
    String department = request.getParameter("department");
    String time = request.getParameter("time");
    try {//from w  ww .j ava 2s  .c  o m
        IndexWriterConfig newConfig = new IndexWriterConfig(analyzer);
        w = new IndexWriter(index, newConfig);
        addDoc(w, name, id, time, department);
        System.out.println(name);
        w.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
    String msg = "Create course success";
    String title = "Create course success";
    gotoMsg(out, request, response, title, msg);
}

From source file:FileIndexer.java

License:Apache License

public static void main(String[] args) {
    String usage = "java FileIndexer" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-excludes FILE] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles\n"
            + "excludes is an optional list of files to be excluded, one per line.";
    String indexPath = "index";
    String docsPath = null;/*from   w  w  w  . java  2s .c o  m*/
    boolean create = true;
    List<String> excludes = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-excludes".equals(args[i])) {
            Scanner sc = null;
            try {
                sc = new Scanner(new File(args[i + 1]));
                i++;
            } catch (FileNotFoundException fnfe) {
                System.err.println(fnfe.getMessage());
                System.exit(1);
            }
            while (sc.hasNext()) {
                excludes.add(sc.next());
            }
            sc.close();
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new LimitTokenCountAnalyzer(new StandardAnalyzer(), 1000000);
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir, excludes);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:IrqaQuery.java

License:Apache License

public static void makeIndexWriter(String indexPath, String stopPath, String sim) throws IOException {
    System.out.println("[makeIndexWriter] started");
    System.out.println("[makeIndexWriter]" + stopPath);
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new EnglishAnalyzer(StopFilter.makeStopSet(mygetStopwords(stopPath)));
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    if (sim.equals("TFIDF"))
        iwc.setSimilarity(new ClassicSimilarity());
    else if (sim.equals("BM25"))
        iwc.setSimilarity(new BM25Similarity());
    else/*from www .j a v a2  s . c o m*/
        iwc.setSimilarity(new BM25Similarity());

    writer = new IndexWriter(dir, iwc);
}

From source file:DocIndexer.java

License:Apache License

private RAMDirectory index() throws IOException, UnsupportedEncodingException, FileNotFoundException {
    RAMDirectory directory = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer(CharArraySet.EMPTY_SET));
    config.setOpenMode(OpenMode.CREATE);
    config.setCommitOnClose(true);//from  w  w  w  .ja  v  a  2  s  .  c  o m
    try (IndexWriter iwriter = new IndexWriter(directory, config)) {
        for (String inputFile : inputFiles) {
            File file = new File(inputFile);
            if (file.length() == 0) {
                continue;
            }

            String title;
            try (BufferedReader titleReader = new BufferedReader(
                    new InputStreamReader(new FileInputStream(file), "UTF-8"))) {
                title = titleReader.readLine();
                if (title != null && title.startsWith("[[")) {
                    // Generally the first line of the txt is the title. In a few cases the
                    // first line is a "[[tag]]" and the second line is the title.
                    title = titleReader.readLine();
                }
            }
            Matcher matcher = SECTION_HEADER.matcher(title);
            if (matcher.matches()) {
                title = matcher.group(1);
            }

            String outputFile = AsciiDoctor.mapInFileToOutFile(inputFile, inExt, outExt);
            try (FileReader reader = new FileReader(file)) {
                Document doc = new Document();
                doc.add(new TextField(Constants.DOC_FIELD, reader));
                doc.add(new StringField(Constants.URL_FIELD, prefix + outputFile, Field.Store.YES));
                doc.add(new TextField(Constants.TITLE_FIELD, title, Field.Store.YES));
                iwriter.addDocument(doc);
            }
        }
    }
    return directory;
}

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();/*from   www.j  ava 2  s .co m*/
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:IndexAndSearchOpenStreetMaps1D.java

License:Apache License

private static void createIndex() throws IOException {

    long t0 = System.nanoTime();

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is = Files
            .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt"));
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : "")));

    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    //iwc.setMaxBufferedDocs(109630);
    //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
    iwc.setRAMBufferSizeMB(256.0);//from  ww w  .  ja  v a2s  . c  o  m
    iwc.setMergePolicy(new LogDocMergePolicy());
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    IndexWriter w = new IndexWriter(dir, iwc);

    int count = 0;
    byte[] scratch = new byte[4];
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }

        String[] parts = line.split(",");
        //long id = Long.parseLong(parts[0]);
        int lat = (int) (1000000. * Double.parseDouble(parts[1]));
        //int lon = (int) (1000000. * Double.parseDouble(parts[2]));
        Document doc = new Document();
        if (USE_NF) {
            doc.add(new LegacyIntField("latnum", lat, Field.Store.NO));
            //doc.add(new LongField("lonnum", lon, Field.Store.NO));
        } else {
            doc.add(new IntPoint("lat", lat));
            //doc.add(new SortedNumericDocValuesField("lon", lon));
        }
        w.addDocument(doc);
        count++;
        if (count % 1000000 == 0) {
            System.out.println(count + "...");
        }
    }
    //w.forceMerge(1);
    w.commit();
    System.out.println(w.maxDoc() + " total docs");

    w.close();
    long t1 = System.nanoTime();
    System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index");
}

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Start to scan the glob of xml files//  w  w w .  ja  v a2 s.c  om
 * 
 * @param indexDir where the lucene indexes are generated
 * @param anAnalyzer Analyzer to use for analyzed fields
 * @param similarity instance of Similarity to work with the writer
 * @throws TransformerConfigurationException 
 */
static public void walk(String xmlGlob, String xslFile, String indexDir)
        throws IOException, TransformerConfigurationException {

    info("Lucene, src:" + xmlGlob + " parser:" + xslFile + " index:" + indexDir);

    Path srcDir = Paths.get(xmlGlob);
    PathMatcher glob = FileSystems.getDefault().getPathMatcher("glob:*.xml");
    if (!Files.isDirectory(srcDir)) {
        String pattern = srcDir.getFileName().toString();
        glob = FileSystems.getDefault().getPathMatcher("glob:" + pattern);
        srcDir = srcDir.getParent();
    }
    if (!Files.isDirectory(srcDir)) {
        fatal("FATAL " + srcDir + " NOT FOUND");
    }

    Path indexPath = Paths.get(indexDir);
    Files.createDirectories(indexPath);
    Directory dir = FSDirectory.open(indexPath);

    // TODO configure analyzers
    Analyzer analyzer = new XmlAnalyzer();
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
    conf.setSimilarity(new BM25Similarity());
    conf.setCodec(new ChapitreCodec());
    // Optional: for better indexing performance, if you
    // are indexing many documents, increase the RAM
    // buffer.  But if you do this, increase the max heap
    // size to the JVM (eg add -Xmx512m or -Xmx1g):
    //
    // conf.setRAMBufferSizeMB(256.0);
    lucwriter = new IndexWriter(dir, conf);

    System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
    TransformerFactory tf = TransformerFactory.newInstance();
    tf.setAttribute("http://saxon.sf.net/feature/version-warning", Boolean.FALSE);
    tf.setAttribute("http://saxon.sf.net/feature/recoveryPolicy", new Integer(0));
    parser = tf.newTransformer(new StreamSource(xslFile));

    final PathMatcher matcher = glob; // transmit the matcher by a final variable to the anonymous class
    Files.walkFileTree(srcDir, new SimpleFileVisitor<Path>() {
        @Override
        public FileVisitResult visitFile(Path path, BasicFileAttributes attrs) {
            if (path.getFileName().toString().startsWith("."))
                return FileVisitResult.CONTINUE;
            if (!matcher.matches(path.getFileName()))
                return FileVisitResult.CONTINUE;
            parse(path);
            return FileVisitResult.CONTINUE;
        }

        public FileVisitResult preVisitDirectory(Path path, BasicFileAttributes attrs) {
            // .git, .svn
            if (path.getFileName().toString().startsWith("."))
                return FileVisitResult.SKIP_SUBTREE;
            return FileVisitResult.CONTINUE;
        }
    });

    lucwriter.commit();
    // NOTE: if you want to maximize search performance,
    // you can optionally call forceMerge here.  This can be
    // a terribly costly operation, so generally it's only
    // worth it when your index is relatively static (ie
    // you're done adding documents to it):
    //
    lucwriter.forceMerge(1);
    lucwriter.close();
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from  w w w  . java 2  s  .  c  o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        //Analyzer analyzer = new StandardAnalyzer();
        //Analyzer analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET);
        Analyzer analyzer = new WhitespaceAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Updates the index//from   w w  w.  j  a v  a  2s .c o m
 *
 * @throws IOException
 */
public void updateIndex() throws IOException {
    try {
        long startTime = System.nanoTime();
        // Get the index directory
        Directory dir = FSDirectory.open(Paths.get(indexDirectory));
        // Get the directory for resources
        String resourcesDir = resourceDirectory + "/" + Constants.CSV_LOCATION;
        // Get PDF Analyzer
        Analyzer pdf_analyzer = new PDFAnalyzer(resourceDirectory + "/" + Constants.STOPWORDS_FILE);
        // Create an index writer config with the analyzer
        IndexWriterConfig iwc = new IndexWriterConfig(pdf_analyzer);
        // Set the open mode to create or append
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // Set index to be created
        // Create an index writer
        IndexWriter writer = new IndexWriter(dir, iwc);
        // Index the documents
        indexDocs(writer, resourcesDir);
        long endTime = System.nanoTime();
        LuceneIndexReader.getInstance().initializeIndexReader(writer);
        writer.close();
        log.info("Took: " + (endTime - startTime) / Math.pow(10, 6) + " milliseconds to generate the index.");
    } catch (IOException e) {
        log.error("IO Exception Thrown while updating index " + e.getMessage() + "\n");
    }

}