package acr.browser.lightning.reading;

import com.applovin.sdk.AppLovinErrorCodes;
import com.google.android.gms.plus.PlusShare;
import com.google.firebase.analytics.FirebaseAnalytics;
import com.silvermob.sdk.ownad.Const;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;

/* loaded from: classes.dex */
public class ArticleTextExtractor {
    private static final boolean DEBUG_WEIGHTS = false;
    private static final int MAX_AUTHOR_DESC_LENGHT = 1000;
    private static final int MAX_AUTHOR_NAME_LENGHT = 255;
    private static final int MAX_IMAGE_LENGHT = 255;
    private static final int MAX_LOG_LENGTH = 200;
    private static final int MIN_AUTHOR_NAME_LENGTH = 4;
    private Pattern NEGATIVE;
    private Pattern POSITIVE;
    private Pattern UNLIKELY;
    private OutputFormatter formatter = DEFAULT_FORMATTER;
    private String negativeStr;
    private String positiveStr;
    private String unlikelyStr;
    private static final Pattern NODES = Pattern.compile("p|div|td|h1|h2|article|section");
    private static final Pattern NEGATIVE_STYLE = Pattern.compile("hidden|display: ?none|font-size: ?small");
    private static final Pattern IGNORE_AUTHOR_PARTS = Pattern.compile("by|name|author|posted|twitter|handle|news", 2);
    private static final Set<String> IGNORED_TITLE_PARTS = new LinkedHashSet<String>() { // from class: acr.browser.lightning.reading.ArticleTextExtractor.1
        {
            add("hacker news");
            add("facebook");
            add("home");
            add("articles");
        }
    };
    private static final OutputFormatter DEFAULT_FORMATTER = new OutputFormatter();
    private static final List<Pattern> CLEAN_AUTHOR_PATTERNS = Collections.singletonList(Pattern.compile("By\\S*(.*)[\\.,].*"));

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: classes.dex */
    public static class ImageComparator implements Comparator<ImageResult> {
        private ImageComparator() {
        }

        @Override // java.util.Comparator
        public int compare(ImageResult imageResult, ImageResult imageResult2) {
            return imageResult2.weight.compareTo(imageResult.weight);
        }
    }

    public ArticleTextExtractor() {
        setUnlikely("com(bx|ment|munity)|dis(qus|cuss)|e(xtra|[-]?mail)|foot|header|menu|re(mark|ply)|rss|sh(are|outbox)|sponsora(d|ll|gegate|rchive|ttachment)|(pag(er|ination))|popup|print|login|si(debar|gn|ngle)");
        setPositive("(^(body|content|h?entry|main|page|post|text|blog|story|haupt))|arti(cle|kel)|instapaper_body");
        setNegative("nav($|igation)|user|com(ment|bx)|(^com-)|contact|foot|masthead|(me(dia|ta))|outbrain|promo|related|scroll|(sho(utbox|pping))|sidebar|sponsor|tags|tool|widget|player|disclaimer|toc|infobox|vcard");
    }

    private static void addScore(Element element, int i) {
        setScore(element, i + getScore(element));
    }

    private int calcWeight(Element element) {
        int i = this.POSITIVE.matcher(element.className()).find() ? 0 + 35 : 0;
        if (this.POSITIVE.matcher(element.id()).find()) {
            i += 45;
        }
        if (this.UNLIKELY.matcher(element.className()).find()) {
            i -= 20;
        }
        if (this.UNLIKELY.matcher(element.id()).find()) {
            i -= 20;
        }
        if (this.NEGATIVE.matcher(element.className()).find()) {
            i -= 50;
        }
        if (this.NEGATIVE.matcher(element.id()).find()) {
            i -= 50;
        }
        String attr = element.attr("style");
        if (attr != null && !attr.isEmpty() && NEGATIVE_STYLE.matcher(attr).find()) {
            i -= 50;
        }
        String attr2 = element.attr("itemprop");
        return (attr2 == null || attr2.isEmpty() || !this.POSITIVE.matcher(attr2).find()) ? i : i + 100;
    }

    private static int calcWeightForChild(Element element, String str) {
        int round = ((SHelper.count(str, "&quot;") + SHelper.count(str, "&lt;")) + SHelper.count(str, "&gt;")) + SHelper.count(str, "px") > 5 ? -30 : (int) Math.round(str.length() / 35.0d);
        addScore(element, round);
        return round;
    }

    private static String cleanTitle(String str) {
        int i = 0;
        String[] split = str.split("\\|");
        StringBuilder sb = new StringBuilder(split.length);
        for (String str2 : split) {
            if (!IGNORED_TITLE_PARTS.contains(str2.toLowerCase().trim()) && (i != split.length - 1 || sb.length() <= str2.length())) {
                if (i > 0) {
                    sb.append('|');
                }
                sb.append(str2);
                i++;
            }
        }
        return SHelper.innerTrim(sb.toString());
    }

    private static Element determineImageSource(Element element, List<ImageResult> list) {
        String attr;
        int i = 0;
        Element element2 = null;
        Elements select = element.select("img");
        if (select.isEmpty()) {
            select = element.parent().select("img");
        }
        double d = 1.0d;
        Iterator<Element> it = select.iterator();
        while (it.hasNext()) {
            Element next = it.next();
            String attr2 = next.attr("src");
            if (!attr2.isEmpty() && !isAdImage(attr2)) {
                int i2 = 0;
                int i3 = 0;
                try {
                    i3 = Integer.parseInt(next.attr("height"));
                    i2 = i3 >= 50 ? 0 + 20 : 0 - 20;
                } catch (Exception e) {
                }
                int i4 = 0;
                try {
                    i4 = Integer.parseInt(next.attr("width"));
                    i2 = i4 >= 50 ? i2 + 20 : i2 - 20;
                } catch (Exception e2) {
                }
                String attr3 = next.attr("alt");
                if (attr3.length() > 35) {
                    i2 += 20;
                }
                String attr4 = next.attr(PlusShare.KEY_CONTENT_DEEP_LINK_METADATA_TITLE);
                if (attr4.length() > 35) {
                    i2 += 20;
                }
                boolean z = false;
                if (next.parent() != null && (attr = next.parent().attr("rel")) != null && attr.contains("nofollow")) {
                    z = attr.contains("nofollow");
                    i2 -= 40;
                }
                int i5 = (int) (i2 * d);
                if (i5 > i) {
                    i = i5;
                    element2 = next;
                    d /= 2.0d;
                }
                list.add(new ImageResult(attr2, Integer.valueOf(i5), attr4, i3, i4, attr3, z));
            }
        }
        Collections.sort(list, new ImageComparator());
        return element2;
    }

    private static String doTitleSplits(String str, String str2) {
        String str3 = "";
        int i = 0;
        for (String str4 : str.split(str2)) {
            if (str4.length() > i) {
                str3 = str4;
                i = str4.length();
            }
        }
        return str3.replace("&raquo;", StringUtils.SPACE).replace("»", StringUtils.SPACE).trim();
    }

    private String extractAuthorDescription(Document document, String str) {
        String str2 = "";
        if (str.isEmpty()) {
            return "";
        }
        Elements select = document.select(".byline > .bio");
        if (select != null && !select.isEmpty()) {
            return select.first().text();
        }
        Elements select2 = document.select(".byline span[class*=teaser]");
        if (select2 != null && !select2.isEmpty()) {
            return select2.first().text();
        }
        try {
            Element bestMatchElement = getBestMatchElement(document.select(":containsOwn(" + str + ')'));
            if (bestMatchElement != null) {
                str2 = bestMatchElement.text();
            }
        } catch (Selector.SelectorParseException e) {
        }
        return str2;
    }

    private String extractAuthorName(Document document) {
        Element bestMatchElement;
        Element first = document.select("body [rel*=author]").first();
        String innerTrim = first != null ? SHelper.innerTrim(first.ownText()) : "";
        if (innerTrim.isEmpty()) {
            Element first2 = document.select("head meta[name=author]").first();
            if (first2 != null) {
                innerTrim = SHelper.innerTrim(first2.attr("content"));
            }
            if (innerTrim.isEmpty()) {
                innerTrim = SHelper.innerTrim(document.select("head meta[property=article:author]").attr("content"));
            }
            if (innerTrim.isEmpty()) {
                innerTrim = SHelper.innerTrim(document.select("head meta[property=twitter:creator]").attr("content"));
            }
            if (innerTrim.isEmpty()) {
                innerTrim = SHelper.innerTrim(document.select("meta[itemprop=author], span[itemprop=author]").attr("content"));
            }
            if (innerTrim.isEmpty()) {
                try {
                    Elements select = document.select("a[rel=author],.byline-name,.byLineTag,.byline,.author,.by,.writer,.address");
                    if (select == null || select.isEmpty()) {
                        select = document.select("body [class*=author]");
                    }
                    if (select == null || select.isEmpty()) {
                        select = document.select("body [title*=author]");
                    }
                    if (select == null || select.isEmpty()) {
                        select = document.select(".staff_info dl a[href]");
                    }
                    if (select == null || select.isEmpty()) {
                        select = document.select("cite[class*=source]");
                    }
                    if (select != null && (bestMatchElement = getBestMatchElement(select)) != null) {
                        String text = bestMatchElement.text();
                        if (text.length() < 4) {
                            text = bestMatchElement.text();
                        }
                        innerTrim = SHelper.innerTrim(IGNORE_AUTHOR_PARTS.matcher(text).replaceAll(""));
                        if (innerTrim.contains(",")) {
                            innerTrim = innerTrim.split(",")[0];
                        }
                    }
                } catch (Exception e) {
                    System.out.println(e.toString());
                }
            }
        }
        Iterator<Pattern> it = CLEAN_AUTHOR_PATTERNS.iterator();
        while (it.hasNext()) {
            Matcher matcher = it.next().matcher(innerTrim);
            if (matcher.matches()) {
                return SHelper.innerTrim(matcher.group(1));
            }
        }
        return innerTrim;
    }

    private static String extractCanonicalUrl(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head link[rel=canonical]").attr("href"));
        if (!replaceSpaces.isEmpty()) {
            return replaceSpaces;
        }
        String replaceSpaces2 = SHelper.replaceSpaces(document.select("head meta[property=og:url]").attr("content"));
        return replaceSpaces2.isEmpty() ? SHelper.replaceSpaces(document.select("head meta[name=twitter:url]").attr("content")) : replaceSpaces2;
    }

    private JResult extractContent(JResult jResult, String str, OutputFormatter outputFormatter, Boolean bool, int i) throws Exception {
        if (str.isEmpty()) {
            throw new IllegalArgumentException("html string is empty!?");
        }
        return extractContent(jResult, Jsoup.parse(str), outputFormatter, bool, i);
    }

    private JResult extractContent(JResult jResult, Document document, OutputFormatter outputFormatter, Boolean bool, int i) throws Exception {
        Document mo67clone = document.mo67clone();
        JResult extractContent = extractContent(jResult, document, outputFormatter, bool, i, true);
        return extractContent.getText().isEmpty() ? extractContent(jResult, mo67clone, outputFormatter, bool, i, false) : extractContent;
    }

    private JResult extractContent(JResult jResult, Document document, OutputFormatter outputFormatter, Boolean bool, int i, boolean z) {
        ArrayList arrayList;
        Element determineImageSource;
        if (document == null) {
            throw new NullPointerException("missing document");
        }
        jResult.setTitle(extractTitle(document));
        jResult.setDescription(extractDescription(document));
        jResult.setCanonicalUrl(extractCanonicalUrl(document));
        jResult.setType(extractType(document));
        jResult.setSitename(extractSitename(document));
        jResult.setLanguage(extractLanguage(document));
        jResult.setAuthorName(extractAuthorName(document));
        jResult.setAuthorDescription(extractAuthorDescription(document, jResult.getAuthorName()));
        Date extractDate = extractDate(document);
        if (extractDate == null) {
            jResult.setDate(parseDate(SHelper.estimateDate(jResult.getUrl())));
        } else {
            jResult.setDate(extractDate);
        }
        if (z) {
            prepareDocument(document);
        }
        Element bestMatchElement = getBestMatchElement(getNodes(document));
        if (bestMatchElement != null) {
            if (bool.booleanValue() && (determineImageSource = determineImageSource(bestMatchElement, (arrayList = new ArrayList()))) != null) {
                jResult.setImageUrl(SHelper.replaceSpaces(determineImageSource.attr("src")));
                jResult.setImages(arrayList);
            }
            String removeTitleFromText = removeTitleFromText(outputFormatter.getFormattedText(bestMatchElement), jResult.getTitle());
            if (removeTitleFromText.length() > jResult.getTitle().length()) {
                if (i > 0 && removeTitleFromText.length() > i) {
                    removeTitleFromText = utf8truncate(removeTitleFromText, i);
                }
                jResult.setText(removeTitleFromText);
            }
            String element = bestMatchElement.toString();
            Integer num = 0;
            Iterator<Element> it = bestMatchElement.select("a[href]").iterator();
            while (it.hasNext()) {
                Element next = it.next();
                Integer valueOf = Integer.valueOf(element.indexOf(next.toString(), num.intValue()));
                jResult.addLink(next.attr("abs:href"), next.text(), valueOf);
                num = valueOf;
            }
        }
        if (bool.booleanValue() && jResult.getImageUrl().isEmpty()) {
            jResult.setImageUrl(extractImageUrl(document));
        }
        jResult.setRssUrl(extractRssUrl(document));
        jResult.setVideoUrl(extractVideoUrl(document));
        jResult.setFaviconUrl(extractFaviconUrl(document));
        jResult.setKeywords(extractKeywords(document));
        if (jResult.getAuthorName().length() > 255) {
            jResult.setAuthorName(utf8truncate(jResult.getAuthorName(), 255));
        }
        String snippet = getSnippet(jResult.getAuthorDescription());
        if (getSnippet(jResult.getText()).equals(snippet) || getSnippet(jResult.getDescription()).equals(snippet)) {
            jResult.setAuthorDescription("");
        } else if (jResult.getAuthorDescription().length() > 1000) {
            jResult.setAuthorDescription(utf8truncate(jResult.getAuthorDescription(), 1000));
        }
        if (jResult.getImageUrl().length() > 255) {
            jResult.setImageUrl("");
        }
        return jResult;
    }

    private static Date extractDate(Document document) {
        Element first = document.select("meta[name=ptime]").first();
        String innerTrim = first != null ? SHelper.innerTrim(first.attr("content")) : "";
        if (innerTrim.isEmpty()) {
            innerTrim = SHelper.innerTrim(document.select("meta[name=utime]").attr("content"));
        }
        if (innerTrim.isEmpty()) {
            innerTrim = SHelper.innerTrim(document.select("meta[name=pdate]").attr("content"));
        }
        if (innerTrim.isEmpty()) {
            innerTrim = SHelper.innerTrim(document.select("meta[property=article:published]").attr("content"));
        }
        if (innerTrim.isEmpty()) {
            return parseDate(innerTrim);
        }
        Elements select = document.select("meta[property=article:published_time]");
        if (!select.isEmpty()) {
            Element element = select.get(0);
            if (element.hasAttr("content")) {
                String attr = element.attr("content");
                try {
                    attr = attr.endsWith("Z") ? attr.substring(0, attr.length() - 1) + "GMT-00:00" : String.format(attr.substring(0, attr.length() - 6), attr.substring(attr.length() - 6, attr.length()));
                } catch (StringIndexOutOfBoundsException e) {
                }
                return parseDate(attr);
            }
        }
        Elements select2 = document.select("meta[property=dateCreated], span[property=dateCreated]");
        if (!select2.isEmpty()) {
            Element element2 = select2.get(0);
            return element2.hasAttr("content") ? parseDate(element2.attr("content")) : parseDate(element2.text());
        }
        Elements select3 = document.select("meta[itemprop=datePublished], span[itemprop=datePublished]");
        if (!select3.isEmpty()) {
            Element element3 = select3.get(0);
            return element3.hasAttr("content") ? parseDate(element3.attr("content")) : element3.hasAttr(FirebaseAnalytics.Param.VALUE) ? parseDate(element3.attr(FirebaseAnalytics.Param.VALUE)) : parseDate(element3.text());
        }
        Elements select4 = document.select("meta[name=OriginalPublicationDate]");
        if (!select4.isEmpty()) {
            Element element4 = select4.get(0);
            if (element4.hasAttr("content")) {
                return parseDate(element4.attr("content"));
            }
        }
        Elements select5 = document.select("meta[name=DisplayDate]");
        if (!select5.isEmpty()) {
            Element element5 = select5.get(0);
            if (element5.hasAttr("content")) {
                return parseDate(element5.attr("content"));
            }
        }
        Elements select6 = document.select("meta[name*=date]");
        if (!select6.isEmpty()) {
            Element element6 = select6.get(0);
            if (element6.hasAttr("content")) {
                return parseDate(element6.attr("content"));
            }
        }
        Elements select7 = document.select(".date-header");
        if (select7.isEmpty()) {
            return null;
        }
        return parseDate(select7.get(0).text());
    }

    private static String extractDescription(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[name=description]").attr("content"));
        if (!innerTrim.isEmpty()) {
            return innerTrim;
        }
        String innerTrim2 = SHelper.innerTrim(document.select("head meta[property=og:description]").attr("content"));
        return innerTrim2.isEmpty() ? SHelper.innerTrim(document.select("head meta[name=twitter:description]").attr("content")) : innerTrim2;
    }

    private static String extractFaviconUrl(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head link[rel=icon]").attr("href"));
        return replaceSpaces.isEmpty() ? SHelper.replaceSpaces(document.select("head link[rel^=shortcut],link[rel$=icon]").attr("href")) : replaceSpaces;
    }

    private static String extractImageUrl(Document document) {
        String replaceSpaces = SHelper.replaceSpaces(document.select("head meta[property=og:image]").attr("content"));
        if (!replaceSpaces.isEmpty()) {
            return replaceSpaces;
        }
        String replaceSpaces2 = SHelper.replaceSpaces(document.select("head meta[name=twitter:image]").attr("content"));
        if (!replaceSpaces2.isEmpty()) {
            return replaceSpaces2;
        }
        String replaceSpaces3 = SHelper.replaceSpaces(document.select("link[rel=image_src]").attr("href"));
        return replaceSpaces3.isEmpty() ? SHelper.replaceSpaces(document.select("head meta[name=thumbnail]").attr("content")) : replaceSpaces3;
    }

    private static Collection<String> extractKeywords(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[name=keywords]").attr("content"));
        if (innerTrim.startsWith("[") && innerTrim.endsWith("]")) {
            innerTrim = innerTrim.substring(1, innerTrim.length() - 1);
        }
        String[] split = innerTrim.split("\\s*,\\s*");
        return (split.length > 1 || !(split.length <= 0 || split[0] == null || split[0].isEmpty())) ? Arrays.asList(split) : Collections.emptyList();
    }

    private static String extractLanguage(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[property=language]").attr("content"));
        if (innerTrim.isEmpty()) {
            innerTrim = SHelper.innerTrim(document.select(Const.BannerType.CODE).attr("lang"));
            if (innerTrim.isEmpty()) {
                innerTrim = SHelper.innerTrim(document.select("head meta[property=og:locale]").attr("content"));
            }
        }
        return (innerTrim.isEmpty() || innerTrim.length() <= 2) ? innerTrim : innerTrim.substring(0, 2);
    }

    private static String extractRssUrl(Document document) {
        return SHelper.replaceSpaces(document.select("link[rel=alternate]").select("link[type=application/rss+xml]").attr("href"));
    }

    private static String extractSitename(Document document) {
        String innerTrim = SHelper.innerTrim(document.select("head meta[property=og:site_name]").attr("content"));
        if (innerTrim.isEmpty()) {
            innerTrim = SHelper.innerTrim(document.select("head meta[name=twitter:site]").attr("content"));
        }
        return innerTrim.isEmpty() ? SHelper.innerTrim(document.select("head meta[property=og:site_name]").attr("content")) : innerTrim;
    }

    private static String extractTitle(Document document) {
        String cleanTitle = cleanTitle(document.title());
        if (!cleanTitle.isEmpty()) {
            return cleanTitle;
        }
        String innerTrim = SHelper.innerTrim(document.select("head title").text());
        if (!innerTrim.isEmpty()) {
            return innerTrim;
        }
        String innerTrim2 = SHelper.innerTrim(document.select("head meta[name=title]").attr("content"));
        if (!innerTrim2.isEmpty()) {
            return innerTrim2;
        }
        String innerTrim3 = SHelper.innerTrim(document.select("head meta[property=og:title]").attr("content"));
        if (!innerTrim3.isEmpty()) {
            return innerTrim3;
        }
        String innerTrim4 = SHelper.innerTrim(document.select("head meta[name=twitter:title]").attr("content"));
        return innerTrim4.isEmpty() ? SHelper.innerTrim(document.select("h1:first-of-type").text()) : innerTrim4;
    }

    private static String extractType(Document document) {
        return SHelper.innerTrim(document.select("head meta[property=og:type]").attr("content"));
    }

    private static String extractVideoUrl(Document document) {
        return SHelper.replaceSpaces(document.select("head meta[property=og:video]").attr("content"));
    }

    private Element getBestMatchElement(Collection<Element> collection) {
        int i = AppLovinErrorCodes.UNABLE_TO_PRECACHE_RESOURCES;
        Element element = null;
        for (Element element2 : collection) {
            int weight = getWeight(element2, false);
            if (weight > i) {
                i = weight;
                element = element2;
            }
        }
        return element;
    }

    private static Collection<Element> getNodes(Document document) {
        LinkedHashMap linkedHashMap = new LinkedHashMap(64);
        int i = 100;
        Iterator<Element> it = document.select("body").select("*").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (NODES.matcher(next.tagName()).matches()) {
                linkedHashMap.put(next, null);
                setScore(next, i);
                i /= 2;
            }
        }
        return linkedHashMap.keySet();
    }

    private static int getScore(Element element) {
        try {
            return Integer.parseInt(element.attr("gravityScore"));
        } catch (Exception e) {
            return 0;
        }
    }

    private static String getSnippet(String str) {
        return str.length() < 50 ? str : str.substring(0, 50);
    }

    private int getWeight(Element element, boolean z) {
        Element first;
        int calcWeight = calcWeight(element) + ((int) Math.round((element.ownText().length() / 100.0d) * 10.0d)) + weightChildNodes(element);
        return (!z || (first = element.select("[extragravityscore]").first()) == null) ? calcWeight : calcWeight + Integer.parseInt(first.attr("extragravityscore"));
    }

    private static boolean isAdImage(String str) {
        return SHelper.count(str, "ad") >= 2;
    }

    private static Date parseDate(String str) {
        return new Date(0L);
    }

    private static void prepareDocument(Document document) {
        removeScriptsAndStyles(document);
    }

    private static Document removeScriptsAndStyles(Document document) {
        Iterator<Element> it = document.getElementsByTag("script").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Iterator<Element> it2 = document.getElementsByTag("noscript").iterator();
        while (it2.hasNext()) {
            it2.next().remove();
        }
        Iterator<Element> it3 = document.getElementsByTag("style").iterator();
        while (it3.hasNext()) {
            it3.next().remove();
        }
        return document;
    }

    private static String removeTitleFromText(String str, String str2) {
        return str;
    }

    private ArticleTextExtractor setNegative(String str) {
        this.negativeStr = str;
        this.NEGATIVE = Pattern.compile(str);
        return this;
    }

    private ArticleTextExtractor setPositive(String str) {
        this.positiveStr = str;
        this.POSITIVE = Pattern.compile(str);
        return this;
    }

    private static void setScore(Element element, int i) {
        element.attr("gravityScore", Integer.toString(i));
    }

    private ArticleTextExtractor setUnlikely(String str) {
        this.unlikelyStr = str;
        this.UNLIKELY = Pattern.compile(str);
        return this;
    }

    private static String utf8truncate(String str, int i) {
        StringBuilder sb = new StringBuilder(i);
        int i2 = 0;
        for (int i3 = 0; i3 < str.length(); i3++) {
            char charAt = str.charAt(i3);
            int i4 = charAt <= 127 ? 1 : charAt <= 2047 ? 2 : charAt <= 55295 ? 3 : charAt <= 56319 ? 4 : charAt <= 57343 ? 0 : 3;
            if (i2 + i4 > i) {
                break;
            }
            sb.append(charAt);
            i2 += i4;
        }
        return sb.toString();
    }

    private int weightChildNodes(Element element) {
        int i = 0;
        Element element2 = null;
        ArrayList arrayList = new ArrayList(5);
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            String ownText = next.ownText();
            int length = ownText.length();
            if (length >= 20) {
                if (length > 200) {
                    i += Math.max(50, length / 10);
                }
                if (next.tagName().equals("h1") || next.tagName().equals("h2")) {
                    i += 30;
                } else if (next.tagName().equals("div") || next.tagName().equals("p")) {
                    i += calcWeightForChild(next, ownText);
                    if (next.tagName().equals("p") && length > 50) {
                        arrayList.add(next);
                    }
                    if (next.className().toLowerCase().equals("caption")) {
                        element2 = next;
                    }
                }
            }
        }
        int i2 = 0;
        Iterator<Element> it2 = element.children().iterator();
        while (it2.hasNext()) {
            Element next2 = it2.next();
            if (this.NEGATIVE.matcher(next2.id()).find() || this.NEGATIVE.matcher(next2.className()).find()) {
                i2 -= 30;
            } else {
                Iterator<Element> it3 = next2.children().iterator();
                while (it3.hasNext()) {
                    Element next3 = it3.next();
                    String ownText2 = next3.ownText();
                    int length2 = ownText2.length();
                    if (length2 >= 20) {
                        int max = length2 > 200 ? 0 + Math.max(50, length2 / 10) : 0;
                        if (next3.tagName().equals("h1") || next3.tagName().equals("h2")) {
                            max += 30;
                        } else if (next3.tagName().equals("div") || next3.tagName().equals("p")) {
                            max += calcWeightForChild(next3, ownText2);
                        }
                        i2 += max;
                    }
                }
            }
        }
        int i3 = i + (i2 / 3);
        if (element2 != null) {
            i3 += 30;
        }
        if (arrayList.size() >= 2) {
            Iterator<Element> it4 = element.children().iterator();
            while (it4.hasNext()) {
                Element next4 = it4.next();
                if ("h1;h2;h3;h4;h5;h6".contains(next4.tagName())) {
                    i3 += 20;
                } else if ("table;li;td;th".contains(next4.tagName())) {
                    addScore(next4, -30);
                }
                if ("p".contains(next4.tagName())) {
                    addScore(next4, 30);
                }
            }
        }
        return i3;
    }

    public ArticleTextExtractor addNegative(String str) {
        setNegative(this.negativeStr + '|' + str);
        return this;
    }

    public ArticleTextExtractor addPositive(String str) {
        return setPositive(this.positiveStr + '|' + str);
    }

    public ArticleTextExtractor addUnlikely(String str) {
        return setUnlikely(this.unlikelyStr + '|' + str);
    }

    public JResult extractContent(JResult jResult, String str) throws Exception {
        return extractContent(jResult, str, this.formatter, (Boolean) true, 0);
    }

    public JResult extractContent(JResult jResult, String str, int i) throws Exception {
        return extractContent(jResult, str, this.formatter, (Boolean) true, i);
    }

    public JResult extractContent(String str) throws Exception {
        return extractContent(new JResult(), str, 0);
    }

    public JResult extractContent(String str, int i) throws Exception {
        return extractContent(new JResult(), str, i);
    }

    public void setOutputFormatter(OutputFormatter outputFormatter) {
        this.formatter = outputFormatter;
    }

    protected void stripUnlikelyCandidates(Document document) {
        Iterator<Element> it = document.select("body").select("*").iterator();
        while (it.hasNext()) {
            Element next = it.next();
            String lowerCase = next.className().toLowerCase();
            String lowerCase2 = next.id().toLowerCase();
            if (this.NEGATIVE.matcher(lowerCase).find() || this.NEGATIVE.matcher(lowerCase2).find()) {
                next.remove();
            }
        }
    }
}
