List of usage examples for org.apache.lucene.benchmark.byTask.feeds NoMoreDataException NoMoreDataException
NoMoreDataException
From source file:info.boytsov.lucene.parsers.ClueWeb09ContentSource.java
License:Open Source License
void openNextFile() throws NoMoreDataException, IOException { close();/* w w w.ja v a2 s . c o m*/ while (true) { if (nextFile >= inputFiles.size()) { // exhausted files, start a new round, unless forever set to false. if (!forever) { throw new NoMoreDataException(); } nextFile = 0; iteration++; } File f = inputFiles.get(nextFile++); if (verbose) { System.out.println("opening: " + f + " length: " + f.length()); } try { // supports gzip, bzip2, or regular text file, extension is used to detect InputStream inputStream = StreamUtils.inputStream(f); reader = new DataInputStream(inputStream); return; } catch (Exception e) { if (verbose) { System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + " due to " + e.getMessage()); continue; } throw new NoMoreDataException(); } } }
From source file:info.boytsov.lucene.parsers.TrecContentSource.java
License:Apache License
void openNextFile() throws NoMoreDataException, IOException { close();/*from w w w . j a v a 2 s.c om*/ currPathType = null; while (true) { if (nextFile >= inputFiles.size()) { // exhausted files, start a new round, unless forever set to false. if (!forever) { throw new NoMoreDataException(); } nextFile = 0; iteration++; } File f = inputFiles.get(nextFile++); if (verbose) { System.out.println("opening: " + f + " length: " + f.length()); } try { InputStream inputStream = StreamUtils.inputStream(f); // support either gzip, bzip2, or regular text file, by extension reader = new BufferedReader(new InputStreamReader(inputStream, encoding), StreamUtils.BUFFER_SIZE); currPathType = TrecDocParser.pathType(f); return; } catch (Exception e) { if (verbose) { System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + " due to " + e.getMessage()); continue; } throw new NoMoreDataException(); } } }
From source file:parsers.ClueWebContentSource.java
License:Open Source License
void openNextFile() throws NoMoreDataException, IOException { close();/* ww w . ja v a 2s. c o m*/ while (true) { if (nextFile >= inputFiles.size()) { // exhausted files, start a new round, unless forever set to false. if (!forever) { throw new NoMoreDataException(); } nextFile = 0; iteration++; } Path f = inputFiles.get(nextFile++); if (verbose) { System.out.println("opening: " + f + " length: " + f.toFile().length()); } try { // supports gzip, bzip2, or regular text file, extension is used to detect InputStream inputStream = StreamUtils.inputStream(f); reader = new DataInputStream(inputStream); return; } catch (Exception e) { if (verbose) { System.out.println( "Skipping 'bad' file " + f.toFile().getAbsolutePath() + " due to " + e.getMessage()); continue; } throw new NoMoreDataException(); } } }
From source file:parsers.TrecContentSource.java
License:Apache License
void openNextFile() throws NoMoreDataException, IOException { close();// www . ja va2s . c om currPathType = null; while (true) { if (nextFile >= inputFiles.size()) { // exhausted files, start a new round, unless forever set to false. if (!forever) { throw new NoMoreDataException(); } nextFile = 0; iteration++; } Path f = inputFiles.get(nextFile++); if (verbose) { System.out.println("opening: " + f + " length: " + f.toFile().length()); } try { InputStream inputStream = StreamUtils.inputStream(f); // support either gzip, bzip2, or regular text file, by extension reader = new BufferedReader(new InputStreamReader(inputStream, encoding), StreamUtils.BUFFER_SIZE); currPathType = TrecDocParser.pathType(f.toFile()); return; } catch (Exception e) { if (verbose) { System.out.println( "Skipping 'bad' file " + f.toFile().getAbsolutePath() + " due to " + e.getMessage()); continue; } throw new NoMoreDataException(); } } }