View Javadoc
1   package us.codecraft.webmagic.utils;
2   
3   /**
4    * @author hooy
5    */
6   public class BaseSelectorUtils {
7   
8       /**
9        * Jsoup/HtmlCleaner could not parse "tr" or "td" tag directly
10       * https://stackoverflow.com/questions/63607740/jsoup-couldnt-parse-tr-tag
11       *
12       * @param text - the html string
13       * @return text
14       */
15      public static String preParse(String text) {
16          if (((text.startsWith("<tr>") || text.startsWith("<tr ")) && text.endsWith("</tr>"))
17                  || ((text.startsWith("<td>") || text.startsWith("<td ")) && text.endsWith("</td>"))) {
18              text = "<table>" + text + "</table>";
19          }
20          return text;
21      }
22  
23  }