1 package us.codecraft.webmagic.model.annotation; 2 3 import java.lang.annotation.ElementType; 4 import java.lang.annotation.Retention; 5 import java.lang.annotation.Target; 6 7 /** 8 * Define the extractor for field or class.<br> 9 * 10 * @author code4crafter@gmail.com <br> 11 * @since 0.2.0 12 */ 13 @Retention(java.lang.annotation.RetentionPolicy.RUNTIME) 14 @Target({ElementType.FIELD, ElementType.TYPE}) 15 public @interface ExtractBy { 16 17 /** 18 * Extractor expression, support XPath, CSS Selector and regex. 19 * 20 * @return extractor expression 21 */ 22 String value(); 23 24 /** 25 * types of extractor expressions 26 */ 27 public static enum Type {XPath, Regex, Css, JsonPath} 28 29 /** 30 * Extractor type, support XPath, CSS Selector and regex. 31 * 32 * @return extractor type 33 */ 34 Type type() default Type.XPath; 35 36 /** 37 * Define whether the field can be null.<br> 38 * If set to 'true' and the extractor get no result, the entire class will be discarded. <br> 39 * 40 * @return whether the field can be null 41 */ 42 boolean notNull() default false; 43 44 /** 45 * types of source for extracting. 46 */ 47 public static enum Source { 48 /** 49 * extract from the content extracted by class extractor 50 */ 51 SelectedHtml, 52 /** 53 * extract from the raw html 54 */ 55 RawHtml, 56 RawText 57 } 58 59 /** 60 * The source for extracting. <br> 61 * It works only if you already added 'ExtractBy' to Class. <br> 62 * 63 * @return the source for extracting 64 */ 65 Source source() default Source.SelectedHtml; 66 67 /** 68 * Define whether the extractor return more than one result. 69 * When set to 'true', the extractor return a list of string (so you should define the field as List). <br> 70 * 71 * Deprecated since 0.4.2. This option is determined automatically by the class of field. 72 * @deprecated since 0.4.2 73 * @return whether the extractor return more than one result 74 */ 75 boolean multi() default false; 76 77 }