ComboExtract.java

package us.codecraft.webmagic.model.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;

/**
 * Combo 'ExtractBy' extractor with and/or operator.
 *
 * @author code4crafter@gmail.com <br>
 * @since 0.2.1
 */
@Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.TYPE})
public @interface ComboExtract {

    /**
     * The extractors to be combined.
     *
     * @return the extractors to be combined
     */
    ExtractBy[] value();

    public static enum Op {
        /**
         * All extractors will be arranged as a pipeline. <br>
         * The next extractor uses the result of the previous as source.
         */
        And,
        /**
         * All extractors will do extracting separately, <br>
         * and the results of extractors will combined as the final result.
         */
        Or;
    }

    /**
     * Combining operation of extractors.<br>
     *
     * @return combining operation of extractors
     */
    Op op() default Op.And;

    /**
     * Define whether the field can be null.<br>
     * If set to 'true' and the extractor get no result, the entire class will be discarded. <br>
     *
     * @return whether the field can be null
     */
    boolean notNull() default false;

    /**
     * types of source for extracting.
     */
    public static enum Source {
        /**
         * extract from the content extracted by class extractor
         */
        SelectedHtml,
        /**
         * extract from the raw html
         */
        RawHtml
    }

    /**
     * The source for extracting. <br>
     * It works only if you already added 'ExtractBy' to Class. <br>
     *
     * @return the source for extracting
     */
    Source source() default Source.SelectedHtml;

    /**
     * Define whether the extractor return more than one result.
     * When set to 'true', the extractor return a list of string (so you should define the field as List). <br>
     *
     * Deprecated since 0.4.2. This option is determined automatically by the class of field.
     * @deprecated since 0.4.2
     * @return whether the extractor return more than one result
     */
    boolean multi() default false;

}