package com.dingdianapp.library_web.spider;

import com.dingdianapp.library_web.spider.Spider;
import com.dingdianapp.library_web.xpath.XPathParser;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt___CollectionsKt;
import kotlin.jvm.internal.Intrinsics;
import kotlin.sequences.SequencesKt___SequencesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt__StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.seimicrawler.xpath.JXNode;

@Metadata(bv = {1, 0, 3}, d1 = {"\u0000\u001e\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u000e\bÆ\u0002\u0018\u00002\u00020\u0001B\t\b\u0002¢\u0006\u0004\b\u0013\u0010\u0014J\u0010\u0010\u0005\u001a\u00020\u00042\u0006\u0010\u0003\u001a\u00020\u0002H\u0002J\u0010\u0010\u0006\u001a\u00020\u00042\u0006\u0010\u0003\u001a\u00020\u0002H\u0002J\u0016\u0010\n\u001a\u00020\u00042\u0006\u0010\b\u001a\u00020\u00072\u0006\u0010\t\u001a\u00020\u0002J\u000e\u0010\u000b\u001a\u00020\u00022\u0006\u0010\b\u001a\u00020\u0007J\u0010\u0010\f\u001a\u0004\u0018\u00010\u00022\u0006\u0010\b\u001a\u00020\u0007J\u0010\u0010\r\u001a\u0004\u0018\u00010\u00022\u0006\u0010\b\u001a\u00020\u0007J\u000e\u0010\u000e\u001a\u00020\u00042\u0006\u0010\u0003\u001a\u00020\u0002J\u000e\u0010\u000f\u001a\u00020\u00022\u0006\u0010\u0003\u001a\u00020\u0002J\u000e\u0010\u0010\u001a\u00020\u00022\u0006\u0010\u0003\u001a\u00020\u0002J\u000e\u0010\u0011\u001a\u00020\u00022\u0006\u0010\u0003\u001a\u00020\u0002J\u000e\u0010\u0012\u001a\u00020\u00022\u0006\u0010\u0003\u001a\u00020\u0002¨\u0006\u0015"}, d2 = {"Lcom/dingdianapp/library_web/spider/ChapterContentPage;", "", "", "content", "", "checkContentNewLine", "checkContentWordNum", "Lcom/dingdianapp/library_web/xpath/XPathParser;", "parser", "title", "isChapterDetailPage", "getChapterContent", "matchContentFromSpecialIdTag", "matchContentFromAllTag", "checkContentSpecialString", "beautifyChapterContent", "beautifyNewLine", "beautifyBlankSpace", "beautifyContent", "<init>", "()V", "library-web_dingdianappRelease"}, k = 1, mv = {1, 5, 1})
/* loaded from: classes2.dex */
public final class ChapterContentPage {

    @NotNull
    public static final ChapterContentPage INSTANCE = new ChapterContentPage();

    private ChapterContentPage() {
    }

    private final boolean checkContentNewLine(String content) {
        int count;
        count = SequencesKt___SequencesKt.count(Regex.findAll$default(new Regex("br|<p>|\\n"), content, 0, 2, null));
        if (count <= 25) {
            return false;
        }
        Spider.INSTANCE.log("内容中换行符(" + count + ")个数满足要求");
        return true;
    }

    private final boolean checkContentWordNum(String content) {
        String replace = new Regex("\\u3000| |\\s|<br/?>|<p>|</p>").replace(content, "");
        Spider.Companion companion = Spider.INSTANCE;
        companion.log(Intrinsics.stringPlus("清洗前字数：", Integer.valueOf(content.length())));
        companion.log(Intrinsics.stringPlus("清洗后字数：", Integer.valueOf(replace.length())));
        companion.log(Intrinsics.stringPlus("清洗后内容：", replace));
        if (replace.length() <= 400) {
            return false;
        }
        companion.log("内容字数满足要求");
        return true;
    }

    @NotNull
    public final String beautifyBlankSpace(@NotNull String content) {
        Intrinsics.checkNotNullParameter(content, "content");
        return new Regex("[ \\u3000|\\t| ]+").replace(content, "");
    }

    @NotNull
    public final String beautifyChapterContent(@NotNull String content) {
        Intrinsics.checkNotNullParameter(content, "content");
        return beautifyNewLine(beautifyContent(beautifyBlankSpace(beautifyNewLine(content))));
    }

    @NotNull
    public final String beautifyContent(@NotNull String content) {
        List split$default;
        List mutableList;
        String joinToString$default;
        Intrinsics.checkNotNullParameter(content, "content");
        split$default = StringsKt__StringsKt.split$default((CharSequence) new Regex("1[3589]\\d{9}").replace(new Regex("<[^>]+>").replace(new Regex("<script>.*?</script>").replace(content, ""), ""), ""), new char[]{'\n'}, false, 0, 6, (Object) null);
        mutableList = CollectionsKt___CollectionsKt.toMutableList((Collection) split$default);
        Regex regex = new Regex("(http[s]?://)?([\\w\\d-]+\\.)+[\\w]{2,}");
        int size = mutableList.size() - 1;
        if (size >= 0) {
            while (true) {
                int i = size - 1;
                String str = (String) mutableList.get(size);
                if (regex.containsMatchIn(str)) {
                    Spider.INSTANCE.log("发现第" + size + "行包含网址内容。" + str);
                    mutableList.remove(size);
                }
                if (i < 0) {
                    break;
                }
                size = i;
            }
        }
        joinToString$default = CollectionsKt___CollectionsKt.joinToString$default(mutableList, "\n", null, null, 0, null, null, 62, null);
        return joinToString$default;
    }

    @NotNull
    public final String beautifyNewLine(@NotNull String content) {
        Intrinsics.checkNotNullParameter(content, "content");
        return new Regex("%s$").replace(new Regex("^%s").replace(new Regex("((\\r)*\\n)+").replace(new Regex("</?\\s*[br|p]\\s*/?\\s*>").replace(content, "\n"), "\n"), ""), "");
    }

    public final boolean checkContentSpecialString(@NotNull String content) {
        int indexOf$default;
        int indexOf$default2;
        int indexOf$default3;
        int indexOf$default4;
        Intrinsics.checkNotNullParameter(content, "content");
        indexOf$default = StringsKt__StringsKt.indexOf$default((CharSequence) content, "，", 0, false, 6, (Object) null);
        if (indexOf$default == -1) {
            return false;
        }
        indexOf$default2 = StringsKt__StringsKt.indexOf$default((CharSequence) content, "。", 0, false, 6, (Object) null);
        if (indexOf$default2 == -1) {
            return false;
        }
        indexOf$default3 = StringsKt__StringsKt.indexOf$default((CharSequence) content, StringUtils.SPACE, 0, false, 6, (Object) null);
        if (indexOf$default3 != -1) {
            return true;
        }
        indexOf$default4 = StringsKt__StringsKt.indexOf$default((CharSequence) content, "\u3000", 0, false, 6, (Object) null);
        return indexOf$default4 != -1;
    }

    @NotNull
    public final String getChapterContent(@NotNull XPathParser parser) {
        Intrinsics.checkNotNullParameter(parser, "parser");
        ArrayList arrayList = new ArrayList();
        String matchContentFromSpecialIdTag = matchContentFromSpecialIdTag(parser);
        if (matchContentFromSpecialIdTag != null) {
            arrayList.add(matchContentFromSpecialIdTag);
        }
        String matchContentFromAllTag = matchContentFromAllTag(parser);
        if (matchContentFromAllTag != null) {
            arrayList.add(matchContentFromAllTag);
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String content = (String) it.next();
            Intrinsics.checkNotNullExpressionValue(content, "content");
            if (checkContentNewLine(content) || checkContentWordNum(content) || checkContentSpecialString(content)) {
                Spider.Companion companion = Spider.INSTANCE;
                companion.log(Intrinsics.stringPlus("内容美化前：", content));
                String beautifyContent = beautifyContent(content);
                companion.log(Intrinsics.stringPlus("内容美化后：", content));
                return beautifyContent;
            }
        }
        return "";
    }

    public final boolean isChapterDetailPage(@NotNull XPathParser parser, @NotNull String title) {
        Intrinsics.checkNotNullParameter(parser, "parser");
        Intrinsics.checkNotNullParameter(title, "title");
        if (new Regex("[上下][一1][章节页]").containsMatchIn(parser.getHtml())) {
            Spider.INSTANCE.log("页面中含有上一章、下一章等关键字，认定是章节对应阅读页");
            return true;
        }
        ArrayList arrayList = new ArrayList();
        String matchContentFromSpecialIdTag = matchContentFromSpecialIdTag(parser);
        if (matchContentFromSpecialIdTag != null) {
            arrayList.add(matchContentFromSpecialIdTag);
        }
        String matchContentFromAllTag = matchContentFromAllTag(parser);
        if (matchContentFromAllTag != null) {
            arrayList.add(matchContentFromAllTag);
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String content = (String) it.next();
            Intrinsics.checkNotNullExpressionValue(content, "content");
            if (checkContentNewLine(content) || checkContentWordNum(content) || checkContentSpecialString(content)) {
                return true;
            }
        }
        return false;
    }

    @Nullable
    public final String matchContentFromAllTag(@NotNull XPathParser parser) {
        String joinToString$default;
        Intrinsics.checkNotNullParameter(parser, "parser");
        ArrayList arrayList = new ArrayList();
        List<JXNode> element = parser.getElement("//*[string-length(allText())>400 and count(br)>10]/text()");
        if (element != null && (!element.isEmpty())) {
            arrayList.addAll(element);
        }
        List<JXNode> element2 = parser.getElement("//*[string-length(allText())>400 and count(p)>10]//text()");
        if (element2 != null && (!element2.isEmpty())) {
            arrayList.clear();
            arrayList.addAll(element2);
        }
        if (arrayList.size() <= 0) {
            return null;
        }
        joinToString$default = CollectionsKt___CollectionsKt.joinToString$default(arrayList, "\r\n", null, null, 0, null, null, 62, null);
        return joinToString$default;
    }

    @Nullable
    public final String matchContentFromSpecialIdTag(@NotNull XPathParser parser) {
        String joinToString$default;
        Intrinsics.checkNotNullParameter(parser, "parser");
        String[] strArr = {"content", "novelContent", "novel_content", "novelText", "novel_text", "readText", "read_text", "article"};
        int i = 0;
        while (i < 8) {
            String str = strArr[i];
            i++;
            List<JXNode> element = parser.getElement("//*[@id='" + str + "']//text()");
            if (element != null && (!element.isEmpty())) {
                joinToString$default = CollectionsKt___CollectionsKt.joinToString$default(element, "\r\n", null, null, 0, null, null, 62, null);
                return joinToString$default;
            }
        }
        return null;
    }
}
