View Javadoc
1   package us.codecraft.webmagic.model.annotation;
2   
3   import java.lang.annotation.ElementType;
4   import java.lang.annotation.Retention;
5   import java.lang.annotation.Target;
6   
7   /**
8    * Combo 'ExtractBy' extractor with and/or operator.
9    *
10   * @author code4crafter@gmail.com <br>
11   * @since 0.2.1
12   */
13  @Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
14  @Target({ElementType.FIELD, ElementType.TYPE})
15  public @interface ComboExtract {
16  
17      /**
18       * The extractors to be combined.
19       *
20       * @return the extractors to be combined
21       */
22      ExtractBy[] value();
23  
24      public static enum Op {
25          /**
26           * All extractors will be arranged as a pipeline. <br>
27           * The next extractor uses the result of the previous as source.
28           */
29          And,
30          /**
31           * All extractors will do extracting separately, <br>
32           * and the results of extractors will combined as the final result.
33           */
34          Or;
35      }
36  
37      /**
38       * Combining operation of extractors.<br>
39       *
40       * @return combining operation of extractors
41       */
42      Op op() default Op.And;
43  
44      /**
45       * Define whether the field can be null.<br>
46       * If set to 'true' and the extractor get no result, the entire class will be discarded. <br>
47       *
48       * @return whether the field can be null
49       */
50      boolean notNull() default false;
51  
52      /**
53       * types of source for extracting.
54       */
55      public static enum Source {
56          /**
57           * extract from the content extracted by class extractor
58           */
59          SelectedHtml,
60          /**
61           * extract from the raw html
62           */
63          RawHtml
64      }
65  
66      /**
67       * The source for extracting. <br>
68       * It works only if you already added 'ExtractBy' to Class. <br>
69       *
70       * @return the source for extracting
71       */
72      Source source() default Source.SelectedHtml;
73  
74      /**
75       * Define whether the extractor return more than one result.
76       * When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
77       *
78       * Deprecated since 0.4.2. This option is determined automatically by the class of field.
79       * @deprecated since 0.4.2
80       * @return whether the extractor return more than one result
81       */
82      boolean multi() default false;
83  
84  }