QueueScheduler.java

package us.codecraft.webmagic.scheduler;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;

import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;

/**
 * Basic Scheduler implementation.<br>
 * Store urls to fetch in LinkedBlockingQueue and remove duplicate urls by HashMap.
 *
 * Note: if you use this {@link QueueScheduler}
 * with {@link Site#getCycleRetryTimes()} enabled, you may encountered dead-lock
 * when the queue is full.
 *
 * @author code4crafter@gmail.com <br>
 * @since 0.1.0
 */
public class QueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler {

    private final BlockingQueue<Request> queue;

    public QueueScheduler() {
        this.queue = new LinkedBlockingQueue<>();
    }

    /**
     * Creates a {@code QueueScheduler} with the given (fixed) capacity.
     *
     * @param capacity the capacity of this queue,
     * see {@link LinkedBlockingQueue#LinkedBlockingQueue(int)}
     * @since 0.8.0
     */
    public QueueScheduler(int capacity) {
        this.queue = new LinkedBlockingQueue<>(capacity);
    }

    @Override
    public void pushWhenNoDuplicate(Request request, Task task) {
        logger.trace("Remaining capacity: {}", this.queue.remainingCapacity());

        try {
            queue.put(request);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
    }

    @Override
    public Request poll(Task task) {
        return queue.poll();
    }

    @Override
    public int getLeftRequestsCount(Task task) {
        return queue.size();
    }

    @Override
    public int getTotalRequestsCount(Task task) {
        return getDuplicateRemover().getTotalRequestsCount(task);
    }
}