1 package us.codecraft.webmagic.utils; 2 3 /** 4 * @author hooy 5 */ 6 public class BaseSelectorUtils { 7 8 /** 9 * Jsoup/HtmlCleaner could not parse "tr" or "td" tag directly 10 * https://stackoverflow.com/questions/63607740/jsoup-couldnt-parse-tr-tag 11 * 12 * @param text - the html string 13 * @return text 14 */ 15 public static String preParse(String text) { 16 if (((text.startsWith("<tr>") || text.startsWith("<tr ")) && text.endsWith("</tr>")) 17 || ((text.startsWith("<td>") || text.startsWith("<td ")) && text.endsWith("</td>"))) { 18 text = "<table>" + text + "</table>"; 19 } 20 return text; 21 } 22 23 }