View Javadoc
1   package org.oxerr.okcoin.rest.dto.valuereader;
2   
3   import java.io.IOException;
4   import java.io.InputStream;
5   import java.io.InputStreamReader;
6   import java.nio.charset.Charset;
7   
8   import javax.annotation.Nonnull;
9   import javax.annotation.Nullable;
10  
11  import org.apache.commons.io.IOUtils;
12  import org.cyberneko.html.parsers.DOMParser;
13  import org.oxerr.okcoin.rest.service.web.OKCoinClient;
14  import org.oxerr.okcoin.rest.service.web.OKCoinClientException;
15  import org.slf4j.Logger;
16  import org.slf4j.LoggerFactory;
17  import org.w3c.dom.html.HTMLDocument;
18  import org.xml.sax.InputSource;
19  import org.xml.sax.SAXException;
20  
21  public abstract class HtmlPageReader<T> implements ValueReader<T> {
22  
23  	private static final Charset CHARSET = Charset.forName(OKCoinClient.ENCODING);
24  
25  	private final Logger log = LoggerFactory.getLogger(IndexHtmlPageReader.class);
26  
27  	/**
28  	 * {@inheritDoc}
29  	 */
30  	@Override
31  	public T read(InputStream inputStream,
32  		@Nullable String mimeType, @Nullable Charset charset) throws IOException {
33  		HTMLDocument document;
34  		try {
35  			document = toDocument(inputStream, charset != null ? charset: CHARSET);
36  		} catch (SAXException e) {
37  			throw new IOException(e);
38  		}
39  		return read(document);
40  	}
41  
42  	protected abstract T read(HTMLDocument document) throws OKCoinClientException;
43  
44  	private HTMLDocument toDocument(InputStream inputStream, @Nonnull Charset charset)
45  			throws IOException, SAXException {
46  		final InputSource inputSource;
47  		if (log.isTraceEnabled()) {
48  			String html = IOUtils.toString(inputStream, charset);
49  			log.trace("Parsing HTML:\n{}", html);
50  			inputSource = new InputSource(new InputStreamReader(
51  					IOUtils.toInputStream(html, charset), charset));
52  		} else {
53  			inputSource = new InputSource(new InputStreamReader(inputStream,
54  					charset));
55  		}
56  		DOMParser parser = new DOMParser();
57  		parser.parse(inputSource);
58  		HTMLDocument document = (HTMLDocument) parser.getDocument();
59  		return document;
60  	}
61  
62  }