View Javadoc
1   package us.codecraft.webmagic.model.annotation;
2   
3   import java.lang.annotation.ElementType;
4   import java.lang.annotation.Retention;
5   import java.lang.annotation.Target;
6   
7   /**
8    * Define the extractor for field or class.<br>
9    *
10   * @author code4crafter@gmail.com <br>
11   * @since 0.2.0
12   */
13  @Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
14  @Target({ElementType.FIELD, ElementType.TYPE})
15  public @interface ExtractBy {
16  
17      /**
18       * Extractor expression, support XPath, CSS Selector and regex.
19       *
20       * @return extractor expression
21       */
22      String value();
23  
24      /**
25       * types of extractor expressions
26       */
27      public static enum Type {XPath, Regex, Css, JsonPath}
28  
29      /**
30       * Extractor type, support XPath, CSS Selector and regex.
31       *
32       * @return extractor type
33       */
34      Type type() default Type.XPath;
35  
36      /**
37       * Define whether the field can be null.<br>
38       * If set to 'true' and the extractor get no result, the entire class will be discarded. <br>
39       *
40       * @return whether the field can be null
41       */
42      boolean notNull() default false;
43  
44      /**
45       * types of source for extracting.
46       */
47      public static enum Source {
48          /**
49           * extract from the content extracted by class extractor
50           */
51          SelectedHtml,
52          /**
53           * extract from the raw html
54           */
55          RawHtml,
56          RawText
57      }
58  
59      /**
60       * The source for extracting. <br>
61       * It works only if you already added 'ExtractBy' to Class. <br>
62       *
63       * @return the source for extracting
64       */
65      Source source() default Source.SelectedHtml;
66  
67      /**
68       * Define whether the extractor return more than one result.
69       * When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
70       *
71       * Deprecated since 0.4.2. This option is determined automatically by the class of field.
72       * @deprecated since 0.4.2
73       * @return whether the extractor return more than one result
74       */
75      boolean multi() default false;
76  
77  }