1 package org.oxerr.okcoin.rest.dto.valuereader;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.io.InputStreamReader;
6 import java.nio.charset.Charset;
7
8 import javax.annotation.Nonnull;
9 import javax.annotation.Nullable;
10
11 import org.apache.commons.io.IOUtils;
12 import org.cyberneko.html.parsers.DOMParser;
13 import org.oxerr.okcoin.rest.service.web.OKCoinClient;
14 import org.oxerr.okcoin.rest.service.web.OKCoinClientException;
15 import org.slf4j.Logger;
16 import org.slf4j.LoggerFactory;
17 import org.w3c.dom.html.HTMLDocument;
18 import org.xml.sax.InputSource;
19 import org.xml.sax.SAXException;
20
21 public abstract class HtmlPageReader<T> implements ValueReader<T> {
22
23 private static final Charset CHARSET = Charset.forName(OKCoinClient.ENCODING);
24
25 private final Logger log = LoggerFactory.getLogger(IndexHtmlPageReader.class);
26
27
28
29
30 @Override
31 public T read(InputStream inputStream,
32 @Nullable String mimeType, @Nullable Charset charset) throws IOException {
33 HTMLDocument document;
34 try {
35 document = toDocument(inputStream, charset != null ? charset: CHARSET);
36 } catch (SAXException e) {
37 throw new IOException(e);
38 }
39 return read(document);
40 }
41
42 protected abstract T read(HTMLDocument document) throws OKCoinClientException;
43
44 private HTMLDocument toDocument(InputStream inputStream, @Nonnull Charset charset)
45 throws IOException, SAXException {
46 final InputSource inputSource;
47 if (log.isTraceEnabled()) {
48 String html = IOUtils.toString(inputStream, charset);
49 log.trace("Parsing HTML:\n{}", html);
50 inputSource = new InputSource(new InputStreamReader(
51 IOUtils.toInputStream(html, charset), charset));
52 } else {
53 inputSource = new InputSource(new InputStreamReader(inputStream,
54 charset));
55 }
56 DOMParser parser = new DOMParser();
57 parser.parse(inputSource);
58 HTMLDocument document = (HTMLDocument) parser.getDocument();
59 return document;
60 }
61
62 }