package com.xdja.spider.robot.grab;

import com.xdja.spider.core.bean.Article;
import com.xdja.spider.core.bean.GrabConf;
import com.xdja.spider.core.grab.GrabDetail;
import com.xdja.spider.core.grab.GrabUtil;
import com.xdja.spider.core.util.HtmlGenerator;
import com.xdja.spider.robot.service.ISpiderRobotService;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.CollectionUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * 抓取线程
 *
 * @author hsun
 * @version 1.0
 * @since 2017/7/28 下午2:00
 */
public class GrabHandle implements Runnable {

    private static Logger logger = LoggerFactory.getLogger(GrabHandle.class);

    private long colId;
    private int maxBatch;

    private GrabConf grabConf;

    private ISpiderRobotService spiderRobotService;

    public GrabHandle(ISpiderRobotService spiderRobotService, long colId, int maxBatch) {
        this.spiderRobotService = spiderRobotService;
        this.colId = colId;
        this.maxBatch = maxBatch;
    }

    @Override
    public void run() {
        logger.debug("开始抓取:栏目【{}】，批次【{}】", this.colId, this.maxBatch);

        this.grabConf = this.spiderRobotService.getGrabConf(this.colId);
        if (null == this.grabConf) {
            logger.error("抓取配置为空，停止抓取:栏目【{}】，批次【{}】", this.colId, this.maxBatch);
            return;
        }

        logger.debug("抓取列表:栏目【{}】，批次【{}】", this.colId, this.maxBatch);
        grabList();

        logger.debug("抓取详情:栏目【{}】，批次【{}】", this.colId, this.maxBatch);
        grabDetail();

        logger.debug("静态化:栏目【{}】，批次【{}】", this.colId, this.maxBatch);
        staticize();
    }

    private void staticize() {
        List<Article> list = this.spiderRobotService.wait4Static(this.colId);
        if (CollectionUtils.isEmpty(list)) {
            return;
        }

        List<Article> success = new ArrayList<>();
        for (Article article : list) {
            try {
                HtmlGenerator.generateDefault(article, article.getViewUrl());
                article.setStaticTime(System.currentTimeMillis());
                success.add(article);
            } catch (Exception e) {
                logger.error("静态化失败", e);
            }
        }

        this.spiderRobotService.updateStaticTime(success);

        staticize();
    }

    private void grabDetail() {
        List<Article> list = this.spiderRobotService.wait4GrabDetail(this.colId);

        if (CollectionUtils.isEmpty(list)) {
            return;
        }

        List<Article> successResults = new ArrayList<>();
        List<String> images = new ArrayList<>();
        for (Article article : list) {
            try {
                GrabDetail grabDetail = GrabUtil.grabDetail(article.getSourceUrl(), this.grabConf);
                article.setContent(grabDetail.getContent());
                article.setDescription(grabDetail.getDescription());
                article.setTitle(grabDetail.getTitle());
                article.setReleaseTime(System.currentTimeMillis());
                article.setReleaseStatus(Article.ReleaseStatus.RELEASE.value);

                List<String> imgs = grabDetail.getImgs();
                if (!CollectionUtils.isEmpty(imgs)) {
                    for (String img : imgs) {
                        images.add(String.format("%s#%s", article.getId(), img));
                    }
                }

                successResults.add(article);
            } catch (IOException e) {
                logger.error("详情抓取失败", e);
            }
        }

        this.spiderRobotService.updateArticle(successResults);
        this.spiderRobotService.saveArticleImgs(images);

        grabDetail();
    }

    private void grabList() {
        try {
            List<Article> grabLists = GrabUtil.grabList(this.grabConf, this.maxBatch);
            boolean isGo = this.spiderRobotService.saveArticle(grabLists, this.maxBatch);
            if (isGo && StringUtils.isNotEmpty(this.grabConf.getSourceUrl())) {
                logger.debug("继续抓取:{}", this.grabConf.getSourceUrl());
                grabList();
            }
        } catch (IOException e) {
            logger.error("抓取列表失败", e);
        }
    }
}
