ExtractBy.java

  1. package us.codecraft.webmagic.model.annotation;

  2. import java.lang.annotation.ElementType;
  3. import java.lang.annotation.Retention;
  4. import java.lang.annotation.Target;

  5. /**
  6.  * Define the extractor for field or class.<br>
  7.  *
  8.  * @author code4crafter@gmail.com <br>
  9.  * @since 0.2.0
  10.  */
  11. @Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
  12. @Target({ElementType.FIELD, ElementType.TYPE})
  13. public @interface ExtractBy {

  14.     /**
  15.      * Extractor expression, support XPath, CSS Selector and regex.
  16.      *
  17.      * @return extractor expression
  18.      */
  19.     String value();

  20.     /**
  21.      * types of extractor expressions
  22.      */
  23.     public static enum Type {XPath, Regex, Css, JsonPath}

  24.     /**
  25.      * Extractor type, support XPath, CSS Selector and regex.
  26.      *
  27.      * @return extractor type
  28.      */
  29.     Type type() default Type.XPath;

  30.     /**
  31.      * Define whether the field can be null.<br>
  32.      * If set to 'true' and the extractor get no result, the entire class will be discarded. <br>
  33.      *
  34.      * @return whether the field can be null
  35.      */
  36.     boolean notNull() default false;

  37.     /**
  38.      * types of source for extracting.
  39.      */
  40.     public static enum Source {
  41.         /**
  42.          * extract from the content extracted by class extractor
  43.          */
  44.         SelectedHtml,
  45.         /**
  46.          * extract from the raw html
  47.          */
  48.         RawHtml,
  49.         RawText
  50.     }

  51.     /**
  52.      * The source for extracting. <br>
  53.      * It works only if you already added 'ExtractBy' to Class. <br>
  54.      *
  55.      * @return the source for extracting
  56.      */
  57.     Source source() default Source.SelectedHtml;

  58.     /**
  59.      * Define whether the extractor return more than one result.
  60.      * When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
  61.      *
  62.      * Deprecated since 0.4.2. This option is determined automatically by the class of field.
  63.      * @deprecated since 0.4.2
  64.      * @return whether the extractor return more than one result
  65.      */
  66.     boolean multi() default false;

  67. }