List of usage examples for org.apache.commons.io.input BOMInputStream BOMInputStream
public BOMInputStream(InputStream delegate, ByteOrderMark... boms)
From source file:adept.io.Reader.java
/** * Reads specified XML file into a DOM object. * * @param path the path/*from w w w . j a v a 2s .c o m*/ * @return the document */ public org.w3c.dom.Document readXML(String path) { try { InputStream is = findStreamInClasspathOrFileSystem(path); BOMInputStream bis = new BOMInputStream(is, false); DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); org.w3c.dom.Document doc = dBuilder.parse(bis); // TODO - might have backslashes. doc.setDocumentURI(path); return doc; } catch (Exception e) { //e.printStackTrace(); } return null; }
From source file:com.thetdgroup.TextExtractionAdapter.java
private ContentInformation processFile(File fileName) throws IOException { ContentInformation extractedContent = new ContentInformation(); ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); ///*from w ww. jav a 2 s . co m*/ InputStream inputStream = null; BOMInputStream bomInputStream = null; try { inputStream = new FileInputStream(fileName); bomInputStream = new BOMInputStream(inputStream, false); contenthandler = new BodyContentHandler(); metadata.set(Metadata.RESOURCE_NAME_KEY, fileName.getName()); Parser parser = new AutoDetectParser(); parser.parse(bomInputStream, contenthandler, metadata); } catch (Exception exception) { extractedContent.hasException(); extractedContent.setException(exception.toString()); } finally { if (bomInputStream != null) { bomInputStream.close(); } if (inputStream != null) { inputStream.close(); } } // // extractedContent.setImportedFileName(fileName.getName()); if (contenthandler != null) { String content = contenthandler.toString().replace("\n", " "); extractedContent.setContentData(content); } if (metadata != null) { // CREATIVE COMMONS extractedContent.setLicenseLocation(metadata.get(Metadata.LICENSE_LOCATION)); extractedContent.setLicenceURL(metadata.get(Metadata.LICENSE_URL)); extractedContent.setWorkType(metadata.get(Metadata.WORK_TYPE)); // DUBLIN CORE extractedContent.setContributor(metadata.get(Metadata.CONTRIBUTOR)); extractedContent.setCoverage(metadata.get(Metadata.COVERAGE)); extractedContent.setCreator(metadata.get(Metadata.CREATOR)); extractedContent.setDate(metadata.get(Metadata.DATE)); extractedContent.setDescription(metadata.get(Metadata.DESCRIPTION)); extractedContent.setFormat(metadata.get(Metadata.FORMAT)); extractedContent.setIdentifier(metadata.get(Metadata.IDENTIFIER)); extractedContent.setLanguage(metadata.get(Metadata.LANGUAGE)); extractedContent.setModified(metadata.get(Metadata.MODIFIED)); extractedContent.setPublisher(metadata.get(Metadata.PUBLISHER)); extractedContent.setRelation(metadata.get(Metadata.RELATION)); extractedContent.setRights(metadata.get(Metadata.RIGHTS)); extractedContent.setDublinSource(metadata.get(org.apache.tika.metadata.DublinCore.SOURCE)); extractedContent.setSubject(metadata.get(Metadata.SUBJECT)); extractedContent.setTitle(metadata.get(Metadata.TITLE)); extractedContent.setType(metadata.get(Metadata.TYPE)); // GEOGRAPHIC //extractedContent.setAltitude(metadata.get(Metadata.ALTITUDE)); //extractedContent.setLatitude(metadata.get(Metadata.LATITUDE)); //extractedContent.setLongitude(metadata.get(Metadata.LONGITUDE)); // HTTP HEADERS extractedContent.setContentDisposition(metadata.get(Metadata.CONTENT_DISPOSITION)); extractedContent.setContentEncoding(metadata.get(Metadata.CONTENT_ENCODING)); extractedContent.setContentLanguage(metadata.get(Metadata.CONTENT_LANGUAGE)); extractedContent.setContentLength(metadata.get(Metadata.CONTENT_LENGTH)); extractedContent.setContentLocation(metadata.get(Metadata.CONTENT_LOCATION)); extractedContent.setContentMD5(metadata.get(Metadata.CONTENT_MD5)); extractedContent.setContentType(metadata.get(Metadata.CONTENT_TYPE)); extractedContent.setLastModifier(metadata.get(Metadata.LAST_MODIFIED)); extractedContent.setLocation(metadata.get(Metadata.LOCATION)); // MESSAGE (EMAIL) //extractedContent.setMessageBCC(metadata.get(Metadata.MESSAGE_BCC)); //extractedContent.setMessageCC(metadata.get(Metadata.MESSAGE_CC)); //extractedContent.setMessageFrom(metadata.get(Metadata.MESSAGE_FROM)); //extractedContent.setMessageRecipientAddress(metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS)); //extractedContent.setMessageTo(metadata.get(Metadata.MESSAGE_TO)); // MS OFFICE extractedContent.setApplicationName(metadata.get(Metadata.APPLICATION_NAME)); extractedContent.setApplicationVersion(metadata.get(Metadata.APPLICATION_VERSION)); extractedContent.setAuthor(metadata.get(Metadata.AUTHOR)); extractedContent.setCategory(metadata.get(Metadata.CATEGORY)); extractedContent.setCharacterCount(metadata.get(Metadata.CHARACTER_COUNT)); extractedContent.setCharacterCountWithSpace(metadata.get(Metadata.CHARACTER_COUNT_WITH_SPACES)); extractedContent.setComments(metadata.get(Metadata.COMMENTS)); extractedContent.setCompany(metadata.get(Metadata.COMPANY)); extractedContent.setContentStatus(metadata.get(Metadata.CONTENT_STATUS)); extractedContent.setCreationDate(metadata.get(Metadata.CREATION_DATE)); extractedContent.setEditTime(metadata.get(Metadata.EDIT_TIME)); extractedContent.setKeywords(metadata.get(Metadata.KEYWORDS)); extractedContent.setLastAuthor(metadata.get(Metadata.LAST_AUTHOR)); extractedContent.setLastPrinted(metadata.get(Metadata.LAST_PRINTED)); extractedContent.setLastSaved(metadata.get(Metadata.LAST_SAVED)); extractedContent.setLineCount(metadata.get(Metadata.LINE_COUNT)); extractedContent.setManager(metadata.get(Metadata.MANAGER)); extractedContent.setNotes(metadata.get(Metadata.NOTES)); extractedContent.setPageCount(metadata.get(Metadata.PAGE_COUNT)); extractedContent.setParagraphCount(metadata.get(Metadata.PARAGRAPH_COUNT)); extractedContent.setPresentationFormat(metadata.get(Metadata.PRESENTATION_FORMAT)); extractedContent.setRevisionNumber(metadata.get(Metadata.REVISION_NUMBER)); extractedContent.setSecurity(metadata.get(Metadata.SECURITY)); extractedContent.setSlideCount(metadata.get(Metadata.SLIDE_COUNT)); extractedContent.setTemplate(metadata.get(Metadata.TEMPLATE)); extractedContent.setTotalTime(metadata.get(Metadata.TOTAL_TIME)); extractedContent.setVersion(metadata.get(Metadata.VERSION)); extractedContent.setWordCount(metadata.get(Metadata.WORD_COUNT)); // CLIMATEFORCAST //extractedContent.setClimateForcastAcknowledgement(metadata.get(org.apache.tika.metadata.ClimateForcast.ACKNOWLEDGEMENT)); //extractedContent.setClimateForcastCommandLine(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMAND_LINE)); //extractedContent.setClimateForcastComment(metadata.get(org.apache.tika.metadata.ClimateForcast.COMMENT)); //extractedContent.setClimateForcastContact(metadata.get(org.apache.tika.metadata.ClimateForcast.CONTACT)); //extractedContent.setClimateForcastConvention(metadata.get(org.apache.tika.metadata.ClimateForcast.CONVENTIONS)); //extractedContent.setClimateForcastExperimentID(metadata.get(org.apache.tika.metadata.ClimateForcast.EXPERIMENT_ID)); //extractedContent.setClimateForcastHistory(metadata.get(org.apache.tika.metadata.ClimateForcast.HISTORY)); //extractedContent.setClimateForcastInstitution(metadata.get(org.apache.tika.metadata.ClimateForcast.INSTITUTION)); //extractedContent.setClimateForcastModelName(metadata.get(org.apache.tika.metadata.ClimateForcast.MODEL_NAME_ENGLISH)); //extractedContent.setClimateForcastProgramID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROGRAM_ID)); //extractedContent.setClimateForcastProjectID(metadata.get(org.apache.tika.metadata.ClimateForcast.PROJECT_ID)); //extractedContent.setClimateForcastRealization(metadata.get(org.apache.tika.metadata.ClimateForcast.REALIZATION)); //extractedContent.setClimateForcastReferences(metadata.get(org.apache.tika.metadata.ClimateForcast.REFERENCES)); //extractedContent.setClimateForcastSource(metadata.get(org.apache.tika.metadata.ClimateForcast.SOURCE)); //extractedContent.setClimateForcastTableID(metadata.get(org.apache.tika.metadata.ClimateForcast.TABLE_ID)); // TIFF //extractedContent.setTIFFBitsPerSample(metadata.get(Metadata.BITS_PER_SAMPLE)); //extractedContent.setTIFFEquipmentMake(metadata.get(Metadata.EQUIPMENT_MAKE)); //extractedContent.setTIFFEquipmentModel(metadata.get(Metadata.EQUIPMENT_MODEL)); //extractedContent.setTIFFExposureLimit(metadata.get(Metadata.EXPOSURE_TIME)); //extractedContent.setTIFFFNumber(metadata.get(Metadata.F_NUMBER)); //extractedContent.setTIFFFlashFired(metadata.get(Metadata.FLASH_FIRED)); //extractedContent.setTIFFFocalLength(metadata.get(Metadata.FOCAL_LENGTH)); //extractedContent.setTIFFImageLength(metadata.get(Metadata.IMAGE_LENGTH)); //extractedContent.setTIFFImageWidth(metadata.get(Metadata.IMAGE_WIDTH)); //extractedContent.setTIFFISOSpeedRating(metadata.get(Metadata.ISO_SPEED_RATINGS)); //extractedContent.setTIFFOrientation(metadata.get(Metadata.ORIENTATION)); //extractedContent.setTIFFOriginalDate(metadata.get(Metadata.ORIGINAL_DATE)); //extractedContent.setTIFFResolutionHorizontal(metadata.get(Metadata.RESOLUTION_HORIZONTAL)); //extractedContent.setTIFFResolutionUnit(metadata.get(Metadata.RESOLUTION_UNIT)); //extractedContent.setTIFFResolutionVertical(metadata.get(Metadata.RESOLUTION_VERTICAL)); //extractedContent.setTIFFSamplePerPixel(metadata.get(Metadata.SAMPLES_PER_PIXEL)); //extractedContent.setTIFFSoftware(metadata.get(Metadata.SOFTWARE)); // TIKA METADATA KEYS extractedContent.setResourceNameKey(metadata.get(Metadata.RESOURCE_NAME_KEY)); // TIKA MIME KEYS extractedContent.setMimeTypeMagic(metadata.get(Metadata.MIME_TYPE_MAGIC)); extractedContent.setTikaMimeType(metadata.get(Metadata.TIKA_MIME_FILE)); } // return extractedContent; }
From source file:ca.uhn.fhir.validation.SchemaBaseValidator.java
private Source loadXml(String theSystemId, String theSchemaName) { String pathToBase = myCtx.getVersion().getPathToSchemaDefinitions() + '/' + theSchemaName; ourLog.debug("Going to load resource: {}", pathToBase); InputStream baseIs = FhirValidator.class.getResourceAsStream(pathToBase); if (baseIs == null) { throw new InternalErrorException("Schema not found. " + RESOURCES_JAR_NOTE); }// w w w.j a v a 2s .c o m baseIs = new BOMInputStream(baseIs, false); InputStreamReader baseReader = new InputStreamReader(baseIs, Charset.forName("UTF-8")); Source baseSource = new StreamSource(baseReader, theSystemId); return baseSource; }
From source file:com.bigdata.rdf.rio.ntriples.BigdataNTriplesParser.java
/** * Implementation of the <tt>parse(InputStream, String)</tt> method defined * in the RDFParser interface.//from w ww .jav a 2 s.c o m * * @param in * The InputStream from which to read the data, must not be * <tt>null</tt>. The InputStream is supposed to contain 7-bit * US-ASCII characters, as per the N-Triples specification. * @param baseURI * The URI associated with the data in the InputStream, must not be * <tt>null</tt>. * @throws IOException * If an I/O error occurred while data was read from the InputStream. * @throws RDFParseException * If the parser has found an unrecoverable parse error. * @throws RDFHandlerException * If the configured statement handler encountered an unrecoverable * error. * @throws IllegalArgumentException * If the supplied input stream or base URI is <tt>null</tt>. */ @Override public synchronized void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException { if (in == null) { throw new IllegalArgumentException("Input stream can not be 'null'"); } // Note: baseURI will be checked in parse(Reader, String) try { parse(new InputStreamReader(new BOMInputStream(in, false), "US-ASCII"), baseURI); } catch (UnsupportedEncodingException e) { // Every platform should support the US-ASCII encoding... throw new RuntimeException(e); } }
From source file:com.enonic.cms.core.resource.FileResourceServiceImpl.java
@Override public InputStream getResourceStream(final FileResourceName name, final boolean ignoreBom) { final File file = getFile(name); if (!file.isFile()) { return null; }/*from ww w .j av a 2 s.c o m*/ try { final FileInputStream in = new FileInputStream(file); if (ignoreBom) { return new BOMInputStream(in, false); } else { return in; } } catch (final IOException e) { throw new RuntimeException("Failed to open " + name.getPath()); } }
From source file:at.jku.ce.adaptivetesting.vaadin.ui.topic.accounting.AccountingQuestionManager.java
public int loadQuestions(File containingFolder) throws JAXBException, IOException { assert containingFolder.exists() && containingFolder.isDirectory(); JAXBContext accountingJAXB = JAXBContext.newInstance(XmlAccountingQuestion.class, AccountingDataStorage.class); JAXBContext profitJAXB = JAXBContext.newInstance(XmlProfitQuestion.class, ProfitDataStorage.class); Unmarshaller accountingUnmarshaller = accountingJAXB.createUnmarshaller(); Unmarshaller profitUnmarshaller = profitJAXB.createUnmarshaller(); final List<AccountingQuestion> accountingList = new ArrayList<>(); final List<ProfitQuestion> profitList = new ArrayList<>(); String accountingRootElement = XmlAccountingQuestion.class.getAnnotation(XmlRootElement.class).name(); String profitRootElement = XmlProfitQuestion.class.getAnnotation(XmlRootElement.class).name(); File[] questions = containingFolder .listFiles(f -> f.isFile() && (f.canRead() || f.setReadable(true)) && f.getName().endsWith(".xml")); // read all questions LogHelper.logInfo("Found " + questions.length + " potential questions"); int successfullyLoaded = 0; for (File f : questions) { LogHelper.logInfo("Loading question with filename: " + f.getName()); BufferedReader reader = null; StringBuilder sb = new StringBuilder(); try {// ww w.j a v a 2s .c o m reader = new BufferedReader(new InputStreamReader( new BOMInputStream(new FileInputStream(f), ByteOrderMark.UTF_8), "UTF8")); String line = null; while ((line = reader.readLine()) != null) { sb.append(line); } } finally { if (reader != null) { reader.close(); } } String fileAsString = sb.toString().replaceAll("& ", "& "); if (fileAsString.contains(profitRootElement)) { LogHelper.logInfo("Question detected as " + ProfitQuestion.class.getName()); // Profit Question XmlProfitQuestion question = (XmlProfitQuestion) profitUnmarshaller .unmarshal(new StringReader(fileAsString)); profitList.add(AccountingXmlHelper.fromXml(question, f.getName())); successfullyLoaded++; } else if (fileAsString.contains(accountingRootElement)) { LogHelper.logInfo("Question detected as " + AccountingQuestion.class.getName()); // Accounting Question XmlAccountingQuestion question = (XmlAccountingQuestion) accountingUnmarshaller .unmarshal(new StringReader(fileAsString)); accountingList.add(AccountingXmlHelper.fromXml(question, f.getName())); successfullyLoaded++; } else { LogHelper.logInfo( "QuestionManager: item type not supported for " + f.getName() + ", ignoring file."); // throw new IllegalArgumentException( // "Question type not supported. File: " + f); continue; } LogHelper.logInfo("Loaded question with filename:" + f.getName()); } // Add question to the question manager accountingList.forEach(q -> addQuestion(q)); profitList.forEach(q -> addQuestion(q)); LogHelper.logInfo("Successfully loaded " + successfullyLoaded + " questions."); return questions.length; }
From source file:com.ggvaidya.scinames.model.Dataset.java
/** * Load this dataset from a CSV file. We load the entire CSV file, except * for blank cells./* w w w .j a v a2s . com*/ * * @param project The project to which the resulting Dataset should belong * @param csvFormat The CSV format of the input file. * @param csvFile The input file to load. * @param renamedColumns Rename these columns on the fly. * @return * @throws IOException */ public static Dataset fromCSV(CSVFormat csvFormat, File csvFile) throws IOException { Dataset dataset = new Dataset(csvFile.getName(), new SimplifiedDate(), Dataset.TYPE_CHECKLIST); // Get ready to filter input files. InputStream ins = new FileInputStream(csvFile); // Look for BOMs and discard! ins = new BOMInputStream(ins, false); // Convert into a Reader. Reader reader = new BufferedReader(new InputStreamReader(ins)); // Load CSV CSVParser parser = csvFormat.withHeader().parse(reader); Map<String, Integer> headerMap = parser.getHeaderMap(); dataset.setColumns(headerMap.entrySet().stream().sorted((Object o1, Object o2) -> { Map.Entry<String, Integer> e1 = (Map.Entry) o1; Map.Entry<String, Integer> e2 = (Map.Entry) o2; return e1.getValue().compareTo(e2.getValue()); }).map(e -> e.getKey()).map(colName -> DatasetColumn.of(colName)) /* .map(col -> { // Rename any renamedColumns. if(renamedColumns.containsKey(col)) return renamedColumns.get(col); else return col; })*/ .collect(Collectors.toList())); dataset.rows.clear(); dataset.rows.addAll(parser.getRecords().stream().map(record -> { DatasetRow row = new DatasetRow(dataset); row.putAll(record.toMap()); return row; }).collect(Collectors.toList())); return dataset; }
From source file:adept.io.Reader.java
/** * Find stream in classpath or file system. * * @param name the name/*from ww w. j a v a2 s . co m*/ * @return the input stream * @throws FileNotFoundException the file not found exception */ public static BOMInputStream findStreamInClasspathOrFileSystem(String name) throws FileNotFoundException { InputStream is = Reader.class.getClassLoader().getResourceAsStream(name); BOMInputStream bis = new BOMInputStream(is, false); if (is == null) { is = Reader.class.getClassLoader().getResourceAsStream(name.replaceAll("\\\\", "/")); bis = new BOMInputStream(is, false); if (is == null) { File f = new File(name); if (!f.exists()) { System.out.println("Warning - creating FileInputStream: " + f.getAbsolutePath()); } is = new FileInputStream(name); bis = new BOMInputStream(is, false); //System.out.println("Reading InputStream: " + f.getAbsolutePath()); } else { System.out.println("InputStream found in Class Loader after adjusting separators."); } } else { //System.out.println("InputStream found in Class Loader."); } return bis; }
From source file:adept.io.Reader.java
/** * Read file into lines.//from w w w . ja v a2 s . c om * * @param filename the filename * @param lines the lines * @return the string */ public String readFileIntoLines(String filename, List<String> lines) { if (lines == null) lines = new ArrayList<String>(); String line = ""; StringBuffer sb = new StringBuffer(); try { BufferedReader in = new BufferedReader(new InputStreamReader( new BOMInputStream(new FileInputStream(new File(filename)), false), "UTF-8")); while ((line = in.readLine()) != null) { if (!line.isEmpty()) { String surrogatesRemoved = checkSurrogates(line); lines.add(surrogatesRemoved); sb.append(surrogatesRemoved); sb.append("\n"); } } in.close(); } catch (IOException ex) { throw new RuntimeException(ex); } return sb.toString(); }
From source file:adept.io.Reader.java
/** * File to lines.//from ww w .j av a 2s .co m * * @param filename the filename * @return the list */ public List<String> fileToLines(String filename) { List<String> lines = new LinkedList<String>(); String line = ""; try { BufferedReader in = new BufferedReader(new InputStreamReader( new BOMInputStream(new FileInputStream(new File(filename)), false), "UTF-8")); while ((line = in.readLine()) != null) { lines.add(checkSurrogates(line)); } in.close(); } catch (IOException ex) { throw new RuntimeException(ex); } return lines; }