1 package us.codecraft.webmagic.selector;
2
3 import org.jsoup.nodes.Document;
4 import org.jsoup.nodes.Element;
5
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.ListIterator;
9
10
11
12
13 public class HtmlNode extends AbstractSelectable {
14
15 private final List<Element> elements;
16
17 public HtmlNode(List<Element> elements) {
18 this.elements = elements;
19 }
20
21 public HtmlNode() {
22 elements = null;
23 }
24
25 protected List<Element> getElements() {
26 return elements;
27 }
28
29 public Selectable smartContent() {
30 SmartContentSelector smartContentSelector = Selectors.smartContent();
31 return select(smartContentSelector, getSourceTexts());
32 }
33
34 @Override
35 public Selectable links() {
36 return selectElements(new LinksSelector());
37 }
38
39 @Override
40 public Selectable xpath(String xpath) {
41 XpathSelector xpathSelector = Selectors.xpath(xpath);
42 return selectElements(xpathSelector);
43 }
44
45 @Override
46 public Selectable selectList(Selector selector) {
47 if (selector instanceof BaseElementSelector) {
48 return selectElements((BaseElementSelector) selector);
49 }
50 return selectList(selector, getSourceTexts());
51 }
52
53 @Override
54 public Selectable select(Selector selector) {
55 return selectList(selector);
56 }
57
58
59
60
61
62
63
64 protected Selectable selectElements(BaseElementSelector elementSelector) {
65 ListIterator<Element> elementIterator = getElements().listIterator();
66 if (!elementSelector.hasAttribute()) {
67 List<Element> resultElements = new ArrayList<Element>();
68 while (elementIterator.hasNext()) {
69 Element element = checkElementAndConvert(elementIterator);
70 List<Element> selectElements = elementSelector.selectElements(element);
71 resultElements.addAll(selectElements);
72 }
73 return new HtmlNode(resultElements);
74 } else {
75
76 List<String> resultStrings = new ArrayList<String>();
77 while (elementIterator.hasNext()) {
78 Element element = checkElementAndConvert(elementIterator);
79 List<String> selectList = elementSelector.selectList(element);
80 resultStrings.addAll(selectList);
81 }
82 return new PlainText(resultStrings);
83
84 }
85 }
86
87
88
89
90
91
92
93
94 private Element checkElementAndConvert(ListIterator<Element> elementIterator) {
95 Element element = elementIterator.next();
96 if (!(element instanceof Document)) {
97 Document root = new Document(element.ownerDocument().baseUri());
98 Element clone = element.clone();
99 root.appendChild(clone);
100 elementIterator.set(root);
101 return root;
102 }
103 return element;
104 }
105
106 @Override
107 public Selectable $(String selector) {
108 CssSelector cssSelector = Selectors.$(selector);
109 return selectElements(cssSelector);
110 }
111
112 @Override
113 public Selectable $(String selector, String attrName) {
114 CssSelector cssSelector = Selectors.$(selector, attrName);
115 return selectElements(cssSelector);
116 }
117
118 @Override
119 public List<Selectable> nodes() {
120 List<Selectable> selectables = new ArrayList<Selectable>();
121 for (Element element : getElements()) {
122 List<Element> childElements = new ArrayList<Element>(1);
123 childElements.add(element);
124 selectables.add(new HtmlNode(childElements));
125 }
126 return selectables;
127 }
128
129 @Override
130 protected List<String> getSourceTexts() {
131 List<String> sourceTexts = new ArrayList<String>(getElements().size());
132 for (Element element : getElements()) {
133 sourceTexts.add(element.toString());
134 }
135 return sourceTexts;
136 }
137 }