ExtractBy.java
package us.codecraft.webmagic.model.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
/**
* Define the extractor for field or class.<br>
*
* @author code4crafter@gmail.com <br>
* @since 0.2.0
*/
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.TYPE})
public @interface ExtractBy {
/**
* Extractor expression, support XPath, CSS Selector and regex.
*
* @return extractor expression
*/
String value();
/**
* types of extractor expressions
*/
public static enum Type {XPath, Regex, Css, JsonPath}
/**
* Extractor type, support XPath, CSS Selector and regex.
*
* @return extractor type
*/
Type type() default Type.XPath;
/**
* Define whether the field can be null.<br>
* If set to 'true' and the extractor get no result, the entire class will be discarded. <br>
*
* @return whether the field can be null
*/
boolean notNull() default false;
/**
* types of source for extracting.
*/
public static enum Source {
/**
* extract from the content extracted by class extractor
*/
SelectedHtml,
/**
* extract from the raw html
*/
RawHtml,
RawText
}
/**
* The source for extracting. <br>
* It works only if you already added 'ExtractBy' to Class. <br>
*
* @return the source for extracting
*/
Source source() default Source.SelectedHtml;
/**
* Define whether the extractor return more than one result.
* When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
*
* Deprecated since 0.4.2. This option is determined automatically by the class of field.
* @deprecated since 0.4.2
* @return whether the extractor return more than one result
*/
boolean multi() default false;
}