User:Lingzhi2/reviewsourcecheck-sb.js

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>
jQuery(document).ready(function($) {
    if ((mw.config.get('wgPageName').indexOf('talk:') < 0) && (mw.config.get('wgPageName').indexOf('Talk:') < 0) && (mw.config.get('wgPageName').indexOf('Special:') < 0) && (mw.config.get('wgPageName').indexOf('Wikipedia:') < 0)) {

        // spantitles  gives direct access to span.title,
        // which is used extensively

        var spantitles = document.getElementsByClassName("Z3988");
        var myTOC = document.getElementsByClassName("toctext");
        // var myHeadings = $("h2, h3, h4");
        myTOCarray = [];
        for (var z = 0; z < myTOC.length; z++) {
            myTOCtxt = myTOC[z].innerText;
            myTOCtxt = myTOCtxt.replace(" ", "_");
            myTOCtxt = "#" + myTOCtxt;
            myTOCarray.push(myTOCtxt);
        }

        ///the TOC is used to get a list of section headers used on page
        // these are reversed and checked from bottom-to-top while sorting
        /// so that if an article has stacked headings, for 
        /// example Primary and Secondary references listed under Works cited,
        // the script won't try to insert Secondary (i.e., lower on the page)
        ///into the sorted list of Primary ones (higher on page) causing 
        // numerous confusing false positives in the sorting process

        myTOCarray.reverse();

        //var citejournals = document.getElementsByClassName("citation journal");
        // var reftext = document.getElementsByClassName("reference-text");  // all sfns
        // var bookspan = document.querySelectorAll("citation book.span title");
        //var webspan = document.querySelectorAll("citation web.span title");
        // spantitle[13].title


        // first check: 
        //  Hyphen in pg. range; 
        //  P/PP error?

        var links = document.links;
        for (var i = 0; i < links.length; i++) {
            var href = links[i].getAttribute('href');

            var srctxt = links[i].parentNode.textContent;
            try {
                var id = links[i].getAttribute('id');
            } catch (err) {
                continue;
            }

            var parent = links[i].parentNode;

            // var index is used below to address the case of sfnm,
            // whose output (i. e., links[i].parentNode.innerHTML) includes different links as siblings, 
            // so the output would be recursively expanded/duplicated
            // within the loop unless you prevent that

            var index = Array.prototype.indexOf.call(parent.children, links[i - 1]);
            var spline = srctxt.split(";");
            for (var k = 0; k < spline.length; k++) {
                var commacount = (spline[k].match(/,/g) || []).length;
                //var hrefcount = (links[i].parentNode.innerHTML.match(spline[k], 'g') || []).length;
                if (index < 0) {

                    if (spline[k].indexOf('pp.') > 0) {

                        // so output from Ucucha's script won't be grabbed and
                        // added to this output

                        spline[k] = spline[k].replace("Harv error: link to", "            ");

                        /* temptxt and commacount2 are used to avoid false positives 
                            like " Brennan, Heathcote & Lucas 1984, p. 9" (commas and 
                            ampersand before p. 9)
                           and "Jones 1942, p. 10, note 3" (commas irrelevant to pagination) */
                        var myPos = spline[k].indexOf('pp.');
                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if ((temptxt.indexOf('-') > 0) && (links[i].parentNode.innerHTML.indexOf('Hyphen')  < 0))
 {
                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Hyphen in pg. range;  </strong>";
                        }

                        if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf('–') < 0) && (spline[k].indexOf('&') < 0) && (commacount < 2) && (spline[k].indexOf('-') < 0) && (spline[k].indexOf(' and ') < 0) && (spline[k].indexOf('&ndash;') < 0)) {

                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> P/PP error? " +
                                temptxt + "; </strong>";

                        }
                    } else if ((href.indexOf('#CITEREF') === 0) && (spline[k].indexOf(' p.')) > 0) {
                        spline[k] = spline[k].replace("Harv error: link to", "            ");
                        var myPos = spline[k].indexOf(' p.');

                        var temptxt = spline[k].substring(myPos, myPos + 12);
                        var commacount2 = (temptxt.match(/,/g) || []).length;

                        if ((temptxt.indexOf('–') > 0) || (commacount2 > 0) || (temptxt.indexOf('-') > 0) || (temptxt.indexOf('&ndash;') > 0)) {

                            //p. 23, note 7; p. 23, n. 7; p.23, citing Smith 1989
                            //
                            if ((temptxt.indexOf(', not') < 0) && (temptxt.indexOf(', n.') < 0) && (temptxt.indexOf(', cit')) < 0) {


                                links[i].parentNode.innerHTML +=
                                    " <strong class=refckErr> P/PP error? " +
                                    temptxt + "; </strong>";
                            }
                        }
                        if (temptxt.indexOf('-') > 0) {
                            links[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Hyphen in pg. range;  </strong>";
                        }
                    }
                }
            }
        }



        // second check: 
        // Warning: Unexpected result – extra formatting in template? 
        // Caution: Missing pagenums for book chapter? 
        // Missing first name for:
        // Inconsistent use of Publisher Location
        // Missing Publisher
        // Missing ISBN
        // Pub. too early for ISBN, perhaps needs {{orig-year}};
        // Missing Identifier/control number, e.g. OCLC;
        // Missing Year/Date;
        //  Missing access date;
        // Missing archive link; 

        ///withLocs etc. used for "Inconsistent use of Publisher Location"

        var withLocs = false;
        var withoutLocs = false;
        var contraryLocs = false;
        var withLocsCnt = 0;
        var withoutLocsCnt = 0;
        idArray = ["arXiv", "ASIN", "Bibcode", "doi:", "ISBN", "ISSN", "JFM", "JSTOR", "LCCN", " MR ", "OCLC", " OL ", "OSTI", "PMC", "PMID", "RFC", "SSRN", "Zbl"];


        for (i = 0; i < spantitles.length; i++) {

            // there is nothing in spantitles[i].title
            // which indicates that a link has been archived, so 
            // srctext is used to catch from textContent

            srctxt = spantitles[i].parentNode.textContent;
            spline = spantitles[i].title.split("rft.au=");
            typoCk = spantitles[i].parentNode.nodeName;

            if ((typoCk === "I") || (typoCk === "B")) {

                spantitles[i].parentNode.innerHTML +=
                    " <strong class=refckErr> Warning: Unexpected result – extra formatting in template? </strong>";

            }

            if ((spantitles[i].title.indexOf("rft.atitle=") > 0) && (spantitles[i].title.indexOf("rft.btitle=") > 0)) {
                if ((srctxt.indexOf(" pp.") < 0) && (srctxt.indexOf(" p.") < 0)) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Caution: Missing pagenums for book chapter? </strong>";
                }
            }
            for (k = 1; k < spline.length; k++) {
                if ((spantitles[i].title.indexOf("rft.au=") > 0) && (srctxt.indexOf("et al.") < 0)) {
                    if ((spline[k].indexOf("+") < 0)) {
                        var spline2 = spline[k].split("&");
                        spantitles[i].parentNode.innerHTML +=
                            " <strong class=refckErr> Missing first name for: <u>" + spline2[0] +
                            "</u>; </strong>";
                    }
                }
            }

            var hasID = false;
            for (qq = 0; qq < idArray.length; qq += 1) {
                if (srctxt.indexOf(idArray[qq]) > 0) {
                    hasID = true;
                }
            }
            if (spantitles[i].title.indexOf("rft.genre=article") > 0) {
                if (hasID === false) {

                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing identifier (ISSN, JSTOR, etc.); </strong>";
                }


            }


            if (spantitles[i].title.indexOf("rft.genre=book") > 0) {

                if ((srctxt.indexOf("Oxford University Press") < 0) && (srctxt.indexOf("University of Calcutta") < 0) && (srctxt.indexOf("Princeton University Press") < 0) && (srctxt.indexOf("Cambridge University Press") < 0)) {

                    if ((spantitles[i].title.indexOf("rft.place") < 0)) {
                        withoutLocs = true;
                        withoutLocsCnt += 1;
                        if ((withLocs === true) && (withoutLocs === true)) {
                            contraryLocs = true;

                        }
                        if (contraryLocs === true) {
                            spantitles[i].parentNode.innerHTML +=
                                "<strong class=refckErr> Inconsistent use of Publisher Location (" +
                                withLocsCnt + " with; " + withoutLocsCnt + " <u>without</u>); </strong>";
                        }
                    } else {
                        withLocs = true;
                        withLocsCnt += 1;
                        if ((withLocs === true) && (withoutLocs === true)) {
                            contraryLocs = true;
                            spantitles[i].parentNode.innerHTML +=
                                "<strong class=refckErr> Inconsistent use of Publisher Location (" +
                                withLocsCnt + " <u>with;</u> " + withoutLocsCnt + " without); </strong>";


                        }
                    }
                }


                if (spantitles[i].title.indexOf("rft.pub") < 0) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing Publisher; </strong>";
                }

                //                if (spantitles[i].parentNode.innerHTML.indexOf("open access publication – free to read") > 0) {
                //                    hasID = true;
                //                }

                if ((spantitles[i].title.indexOf("rft.date") > 0)) {
                    var myDate = spantitles[i].title.slice(spantitles[i].title.indexOf("rft.date") + 9, spantitles[i].title.indexOf("rft.date") + 13);
                    if (myDate >= 1970) {
                        if (hasID === false) {

                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Missing ISBN; </strong>";

                        }
                    } else {
                        if ((spantitles[i].title.indexOf("rft.isbn") > 0) && (srctxt.indexOf(") [") < 0)) {

                            // OK this (") [") is a huge kluge but there's  
                            // nothing to indicate whether origyear is 
                            // populated except by the srctext
                            // having (pubdate) [origdate] & there's 
                            // little restriction on the format of the two dates

                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Pub. too early for ISBN, perhaps needs {{para|orig-year}}; </strong>";
                        }
                        if (hasID === false) {
                            spantitles[i].parentNode.innerHTML +=
                                " <strong class=refckErr> Missing Identifier/control number, e.g. OCLC; </strong>";
                        }
                    }
                } else {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing Year/Date; </strong>";
                }
            }

            if ((spantitles[i].title.indexOf("http") > 0) && (spantitles[i].title.indexOf("rft.genre=book") < 0)) {
                if (srctxt.indexOf("rchived") < 0) {
                    spantitles[i].parentNode.innerHTML +=
                        " <strong class=refckErr> Missing archive link; </strong>";
                    if ((srctxt.indexOf("Retrieved") < 0) && (spantitles[i].title.indexOf("rft.date") < 0)) {
                        spantitles[i].parentNode.innerHTML +=
                            " <strong class=refckErr> Missing access date; </strong>";
                    }
                }
            }
        }

        // third check: 
        // sorting
        // finding duplicate author/title, 
        // removing meaningless initial words, 
        // handling odd date formats "(April 2007)" etc., 
        // sorting stacked reference sections
        ///reverse TOC order and skipping if alreadySorted 


        var refheaders = [];
        var allRefheaders = ["#Books", "#Journals", "#Articles", "#Biographies", "#Bibliography", "#References", "#Citations_and_notes", "#Literature_cited", "#Works_cited", "#Book_sources", "#Primary_sources", "#Secondary_sources", "#Sources", "#Specialized_studies"];


        /// refheaders lets us sort in reverse TOC order
        for (var d = 0; d < myTOCarray.length; d++) {

            if (allRefheaders.indexOf(myTOCarray[d]) > -1) {
                refheaders.push(myTOCarray[d]);
            }
        }

        var alreadySorted = [];
        for (var r = 0; r < refheaders.length; r++) {

            var refsection = jQuery(refheaders[r]).parent().next();
            var newcites = refsection.find('.citation');

            sortedCites = [];
            unsortedCites = [];
            sortIndices = [];
            var id3 = '';
            var oldAuth = '';
            var mySortTxt2 = '';
            for (var h = 0; h < newcites.length; h++) {

                if (alreadySorted.indexOf(newcites[h]) > -1) {
                    continue;
                }

                try {
                    id3 = newcites[h].getAttribute('id');
                } catch (err) {
                    //sortIndices.push(h);
                    continue;
                }
                if (!id3 || id3.indexOf('CITEREF') === 0) {
                    var parentid =
                        newcites[h].parentNode.parentNode.getAttribute('id');

                    if (parentid && parentid.indexOf('cite_note') > -1) {

                        continue;
                    }

                    if (!id3 || id3.indexOf('CITEREF') < 0) {
                        newcites[h].innerHTML +=
                            " <strong class=refckWarn> Caution: Missing <i>ref=<i/> anchor?; </strong>";
                    }

                    if (id3 == null) {
                        mySortTxt2 = newcites[h].innerText;
                        mySortTxt2 = mySortTxt2.replace('"', '');

                        // check for empty string
                        if (mySortTxt2 === '') {

                            continue;
                        }
                        mySortTxt2 = mySortTxt2.trim();
                        // A, An, The..
                        if (mySortTxt2.slice(0, 2) === "A ") {
                            mySortTxt2 = mySortTxt2.slice(2);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }
                        if (mySortTxt2.slice(0, 3) === "An ") {
                            mySortTxt2 = mySortTxt2.slice(3);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }
                        if (mySortTxt2.slice(0, 4) === "The ") {
                            mySortTxt2 = mySortTxt2.slice(4);
                            mySortTxt2 = mySortTxt2.charAt(0).toUpperCase() + mySortTxt2.slice(1);
                        }

                        // for example, in |author-mask={{long dash}} 
                        if (mySortTxt2[0] == "—") {

                            mySortTxt2 = oldAuth + mySortTxt2;
                        }
                        mySortTxt2 = mySortTxt2.replace('"', '');
                        if (mySortTxt2.indexOf(")") > 0) {
                            mySortTxt2 = mySortTxt2.substring(0, mySortTxt2.indexOf(")")) + ")";

                            // handle (April 2006) or (04-11-2006) or anything not (YYYY)
                            mySortTxt3 = /\d{4}/.exec(mySortTxt2);
                            mySortTxt2 = mySortTxt2.replace(mySortTxt3, "");
                            if (mySortTxt3 != null) {
                                mySortTxt2 = mySortTxt2.replace("(", "(" + mySortTxt3 + "-");
                            }
                            mySortTxt2 = mySortTxt2.replace(" )", ")")

                        }
                        if ((mySortTxt2.match(/\s/g) || []).length > 5) {

                            mySortPos2 = mySortTxt2.split(" ", 5).join(" ").length;
                            mySortTxt2 = mySortTxt2.substring(0, mySortPos2);
                        }
                    } else {
                        mySortTxt2 = id3;
                        mySortTxt2 = mySortTxt2.replace("CITEREF", "");
                        firstAuthLname = '';
                        firstAuthLname = newcites[h].innerText;
                        fspline = firstAuthLname.split(",");
                        firstAuthLname = fspline[0].replace('"', '');
                        if (mySortTxt2.indexOf(firstAuthLname) > 0) {

                            //for example, A._Sanjoy2011 --> Sanjoy_A.2011

                            mySortTxt2 = firstAuthLname + "_" + mySortTxt2.replace(firstAuthLname, "");
                        }
                    }

                    //remove html  '' mySortTxt2=mySortTxt2.replace(/<(?:.|\n)*?>/gm, '');

                    // oldauth is for |author-mask={{long dash}} 
                    // below, rmv all digits, parens, curly braces, endashes
                    //    oldAuth = mySortTxt2.replace(/\d+/g, '');
                    oldAuth = mySortTxt2.replace(/[{()}]/g, '');
                    oldAuth = oldAuth.replace(/—/g, "");
                    oldAuth = oldAuth.replace(/\)/g, '');
                    oldAuth = oldAuth.trim();

                    mySortTxt2 = mySortTxt2.toLowerCase();
                    mySortTxt2 = mySortTxt2.replace("'", "");

                    sortedCites.push(mySortTxt2);
                    if (unsortedCites.indexOf(mySortTxt2) > -1) {
                        newcites[h].innerHTML +=
                            " <strong class=refckWarn> Warning: duplicate author/date: " + mySortTxt2 + "; </strong>";
                    }
                    unsortedCites.push(mySortTxt2);
                    sortIndices.push(h);

                    alreadySorted.push(newcites[h]);

                }
            }
            sortedCites.sort(Intl.Collator().compare);

            var myPos2 = 0;
            var txtOut = '';
            for (var p = 0; p < unsortedCites.length; p++) {
                if (unsortedCites[p] != sortedCites[p]) {
                    myPos2 = sortIndices[p];
                    txtOut = sortedCites[p];
                    //              txtOut = sortedCites[p].replace(/\+/g, " ");
                    //              txtOut = txtOut.replace(/\&/g, " ");
                    //txtOut = txtOut.replace(/\%3A/g, ":");
                    //              txtOut = txtOut.replace(/\%2C/g, ",");

                    newcites[myPos2].innerHTML +=
                        " <strong class=refckWarn> Sort error, expected: <u>" + txtOut + "</u>; </strong>";
                }
            }
        }

        // First wait for mediawiki.util to load, and the page to be ready.
        $.when(mw.loader.using('mediawiki.util'), $.ready).then(function() {
            // Default state
            var isHidden = false;
            $(".refckErr").hide();
            $(".refckWarn").hide();

            // Determine previous state from localStorage, if available
            try {
                if (localStorage.getItem('reviewsourcecheck-state') === 'hidden') {
                    isHidden = true;
                }
            } catch (e) {}
            // General usage:
            mw.util.addPortletLink('p-tb', '#', 'Hide ref check', 'ca-hideCk', "Hide ref check");
            mw.util.addPortletLink('p-tb', '#', 'Show ref check', 'ca-showCk', "Show ref check");
            $('#ca-showCk').toggle(!isHidden);
            $('#ca-hideCk').toggle(isHidden);

            $('#ca-hideCk').on('click', function() {
                $(".refckErr").hide();
                $(".refckWarn").hide();
                $('#ca-showCk').show();
                $('#ca-hideCk').hide();
                try {
                    localStorage.setItem('reviewsourcecheck-state', 'hidden');
                } catch (e) {}
                return false;
            });

            $('#ca-showCk').on('click', function() {
                $(".refckErr").show();
                $(".refckWarn").show();
                $('#ca-showCk').hide();
                $('#ca-hideCk').show();
                try {
                    localStorage.setItem('reviewsourcecheck-state', 'shown');
                } catch (e) {}
                return false;
            });
        });

    }
});
// </nowiki>