Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// This script shows, if found, the kanji and kana for an article
// It then calls another script, bindKana.js, to clean up the display of ruby
// For configuration, please see the documentation

// TODO: Reject if any base/reading has too low/high of a ratio.
// TODO: Reject if unbalanced parenthesis count
// TODO: <rb> is not actually in the whatcg standard...

// License: CC0

/* Sample pages:
    https://en.wikipedia.org/wiki/Tamio_Kawachi - kana on wikidata
    https://en.wikipedia.org/wiki/A_Fantastic_Tale_of_Naruto - kanji from wikidata only
    https://en.wikipedia.org/wiki/What_a_Wonderful_World! - kana from wikidata only
    https://en.wikipedia.org/wiki/Asako_I_%26_II - from redirect
    https://en.wikipedia.org/wiki/Bokura_ga_Ita_(film) - interwiki to subsection
    https://ja.wikipedia.org/wiki/%E7%B4%AF - kana part of bolded title
    https://en.wikipedia.org/wiki/Bokutachi_no_Koukan_Nikki - kana not in first sentence
    https://en.wikipedia.org/wiki/Domestic_Girlfriend - first full sentence not lead
    https://ja.wikipedia.org/wiki/SCP%E8%B2%A1%E5%9B%A3 - bolded term w/ kana past first sentence
    https://en.wikipedia.org/wiki/Nuclear_fusion - different term w/ kana in lead
    https://en.wikipedia.org/wiki/Oedipus_Rex - other stuff in kana
    https://en.wikipedia.org/wiki/20th_Century_Boys - overcapturing because title is subset
    https://en.wikipedia.org/wiki/Seiza - kana not at start of parenthesis
    https://en.wikipedia.org/wiki/Indentation_style - other kana in disambiguation
    https://en.wikipedia.org/wiki/Haven%27t_You_Heard%3F_I%27m_Sakamoto - Halfwidth-fullwidth difference
    https://en.wikipedia.org/wiki/Kanji_Furutachi - kanji only
    https://en.wikipedia.org/wiki/Anata_e - hiragana only
    https://en.wikipedia.org/wiki/Anatahan_(film) - katakana only
    https://en.wikipedia.org/wiki/A.LI.CE - latin only
    https://en.wikipedia.org/wiki/0.5_mm - numeric
    https://en.wikipedia.org/wiki/Truth_Coming_Out_of_Her_Well - angle brackets
    https://en.wikipedia.org/wiki/South_of_the_Border,_West_of_the_Sun - kana contains comma
    https://en.wikipedia.org/wiki/Leap_year - multiple kana separated by comma
    https://en.wikipedia.org/wiki/Do_You_Love_Your_Mom_and_Her_Two-Hit_Multi-Target_Attacks%3F - question mark
    https://en.wikipedia.org/wiki/Comic_Magazine - exclamation point
    https://en.wikipedia.org/wiki/Tsurune - dash
    https://en.wikipedia.org/wiki/Flare_(film) - wave dash
    https://en.wikipedia.org/wiki/Dog%C3%97Police - multiplication sign
    https://en.wikipedia.org/wiki/Foreboding_(film) - spaces
    https://en.wikipedia.org/wiki/Age_12 - period in title
    https://en.wikipedia.org/wiki/Suzukake_Nanchara - very long kanji
    https://en.wikipedia.org/wiki/After_the_Rain_(manga) - kanji + hiragana
    https://en.wikipedia.org/wiki/Afro_Tanaka - kanji + katakana
    https://en.wikipedia.org/wiki/Battle_Girl:_The_Living_Dead_in_Tokyo_Bay - katakana + latin
    https://en.wikipedia.org/wiki/Calling_You_(short_story_collection) - kanji + hiragana + latin
    https://en.wikipedia.org/wiki/Ashita_no_Joe - hiragana + katakana
    https://en.wikipedia.org/wiki/Arcadia_of_My_Youth - kanji + hiragana + katakana
    https://en.wikipedia.org/wiki/Haou_Airen - special character
    https://ja.wikipedia.org/wiki/%E6%98%A0%E7%94%BB_%E8%81%B2%E3%81%AE%E5%BD%A2 - reference in between
    https://en.wikipedia.org/wiki/Ninjō - No interlanguage, but wiktionary
    https://en.wikipedia.org/wiki/Seiza - Interlanguage failed, but wiktionary
    https://en.wikipedia.org/wiki/Epsomite - No interlanguage, but wiktionary "see" Table
    https://en.wikipedia.org/wiki/Bakayaro!_I%27m_Plenty_Mad - only part of parenthesis extracted

    https://en.wikipedia.org/wiki/ORCID
    https://en.wikipedia.org/wiki/Survive_Style_5%2B - fails due to +
    https://en.wikipedia.org/wiki/Ko-Shint%C5%8D
    https://ja.wikipedia.org/wiki/Terminate_and_Stay_Resident
    https://en.wikipedia.org/wiki/Ikk%C5%8D-sh%C5%AB
    https://en.wikipedia.org/wiki/Kakegoe - doesn't find jawiki interlanguage
    https://en.wikipedia.org/wiki/Love_Live!_The_School_Idol_Movie - interpunct in reading
    https://en.wikipedia.org/wiki/Lupin_the_Third:_The_Woman_Called_Fujiko_Mine - hyphen in kanji
    https://en.wikipedia.org/wiki/Sunscreen
    https://en.wikipedia.org/wiki/Flag_of_China
    https://en.wikipedia.org/wiki/W3m
    https://en.wikipedia.org/wiki/Magnum_Collection_1999_%22Dear%22
    https://en.wikipedia.org/wiki/EC_Comics
    https://en.wikipedia.org/wiki/CJK_characters
    https://en.wikipedia.org/wiki/My_Girlfriend_is_Shobitch
    https://en.wikipedia.org/wiki/Immaculate_Conception_Cathedral,_Nagasaki - partial match
    https://en.wikipedia.org/wiki/USA-224 - または
    https://en.wikipedia.org/wiki/Milk - bad match
    https://en.wikipedia.org/wiki/Not_invented_here
*/

function setup() {
    // If we're not reading an article, do nothing
    if (!(mw.config.get( 'wgAction' ) === 'view'
          && mw.config.get( 'wgIsArticle' )
          && !location.search.split('oldid=')[1]
          && !mw.config.get("wgIsMainPage")
          && mw.config.get("wgContentLanguage") !== "ja")) {
        return;
    }

    // Assuming that if there's no wikidata, there're no 1:1 interlanguage links,
    // and we don't want cases where a page links to a subsection of a jawiki
    // article
    if (wikidataId === null) {
        return;
    }

    // Placeholder so other elements don't push it down later
    var header;
    if ($('#firstHeading').length) { // Vector
    	header = $('#firstHeading');
    } else if ($('.page-heading').length) { // Minerva
    	header =  $('.page-heading');
    } else {
    	console.error("showKanji-dev.js: Couldn't find a page heading. This skin ("
    	              + mw.config.get( 'skin' ) + ") might not be supported.");
    	return;
    }
    header.append("<div id='kanjiInfo' lang='ja' dir='ltr'></div>");

    // Get the Japanese label from wikidata
    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities
    $.ajax({
        url: "https://www.wikidata.org/w/api.php",
        data: {
            action: "wbgetentities",
            ids: wikidataId,
            props: "labels",
            languages: "ja",
            format: "json",
            origin: "*"
        },
        success: parseJaLabel
    });
}

function parseJaLabel(response) {
    var wikidataInfo = response.entities[wikidataId];
    var jaLabel;
    if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) {
        jaLabel = wikidataInfo.labels.ja.value;
    }

    if (jaLabel) {
    	jaLabel = jaLabel.toHalfWidth();
        console.log("showKanji-dev.js: kanji: `" + jaLabel + "`");
        buildRegexes(jaLabel);
        displayKanji(jaLabel);
    } else {
        return;
    }

    // If the japanese title is not just only kana, get the reading
    if (!kanjiRegexes.kanaOnly.test(jaLabel)) {
        requestKana();
    }
}

function buildRegexes(kanji) {
    // Strip $kanji of all kanji and kana, adding whatever is left to the regex
    var reKanjiKana = /[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴー-]/g;
    var kanjiStripped = kanji.replace(reKanjiKana, "");
    kanjiStripped += " ";
    // Need to add hyphen escaped since it has special behavior in regex classes
    // TODO: Just escape $kanji early instead, like we did before?
    kanjiStripped += "\\-";
    var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");

    kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;
    kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
    kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
    kanjiRegexes.katakanaOnly = new RegExp("^[ァ-ヴーA-Za-z" + kanjiAuxillary + "]+$");

    // Add midpoint for Latin in titles
    if (/\w/.test(kanji)) { kanjiStripped += "・"; }
    console.log("showKanji-dev.js: stripped: `" + kanjiStripped + "`");

    var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)";
    var kanjiEscaped = mw.util.escapeRegExp(kanji);
    // Account for spaces, but ignore backslash and other misc characters
    var reKanjiKanaLatin = /([\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴーA-Za-z0-9])/g;
    var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?");
    kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");

    // Add kanji to regex to make sure we're not getting the reading of some
    // other term
    kanjiRegexes.leadUnspaced = new RegExp(kanjiEscaped + "[^(\n)]*?\\(" + leadReBase);
    kanjiRegexes.lead = new RegExp(kanjiSpaced + "[^(\n)]*?\\(" + leadReBase, "i"); // brittle
}

function displayKanji(kanji) {
	wikidataKanji = kanji;
    $('#kanjiInfo').append("<ruby>" + kanji + "</ruby>");

    // Add some classes so users can choose to not display for example
    // katakana-only kanji in their CSS
    if (kanjiRegexes.latinOnly.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-latin-only");
        $("#kanjiInfo").prop("title", "Japanese title in Latin script");
        $("#kanjiInfo").css("display", "none");
    } else if (kanjiRegexes.hiraganaOnly.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-hiragana-only");
        $("#kanjiInfo").prop("title", "Japanese title in hiragana");
    } else if (kanjiRegexes.katakanaOnly.test(kanji)) {
        $("#kanjiInfo").addClass("kanjiInfo-katakana-only");
        $("#kanjiInfo").prop("title", "Japanese title in katakana");
    } else {
    	$("#kanjiInfo").prop("title", "Japanese title in kanji");
    }
}

function requestKana() {
    // API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims
    // We have to wholesale get all the claims instead of just one because the
    // kana might be present as a qualifier to another claim
    $.ajax({
        url: "https://www.wikidata.org/w/api.php",
        data: {
            action: "wbgetclaims",
            entity: wikidataId,
            format: "json",
            origin: "*"
        },
        success: parseKanaClaim
    });
}

function parseKanaClaim(response) {
    var kana;
    var properties = {
    	                 title: "P1476",
                         nativeLabel: "P1705",
                         officialName: "P1448",
    	                 nameInNativeLanguage: "P1559"
                     };
    var nameInKana = "P1814";
    
    // Try getting nameInKana as a qualifier to some properties                  
    for (var prop in properties) {
    	var pnum = properties[prop];
    	
    	if (response.claims[pnum]) {
            var kanji = response.claims[pnum][0].mainsnak.datavalue.value.text;
            if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
                && response.claims[pnum][0].qualifiers
                && response.claims[pnum][0].qualifiers[nameInKana]) {
                kana = response.claims[pnum][0].qualifiers[nameInKana][0].datavalue.value;
    	        break;
            }
    	}
    }

    // Try getting nameInKana as a general claim
    if (!kana && response.claims[nameInKana]) {
    	prop = "nameInKana";
        kana = response.claims[nameInKana][0].mainsnak.datavalue.value;
    }
    
    // We couldn't find nameInKana
    if (!kana) {
        getInterlanguage();
        return;
    }

    kana = kana.toHalfWidth();
    displayKana(kana);
    $("#kanjiInfo").addClass("kanjiInfo-wikidata");
    $("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}

function getInterlanguage() {
    var apiUrl = location.origin + "/w/api.php";
    // Documentation: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Blanglinks
    $.ajax({
        url: apiUrl,
        data: {
            action: "query",
            format: "json",
            prop: "langlinks",
            lllang: "ja",
            titles: mw.config.get( 'wgTitle' )
        },
        success: function(response) {
        	var pageId = mw.config.get( 'wgArticleId' );
        	var page = response.query.pages[pageId];
            var langlinks = page ? page.langlinks : undefined;
        	var jaLabel;
        	if (langlinks) {
        	    jaLabel = langlinks[0]["*"];
        	    jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors
        	} else {
        		getWiktionary();
        		return;
        	}
        	scrapeKana(jaLabel);
        }
    });
}

function scrapeKana(jaLabel) {
    // Get jawiki article's lead wikitext
    // API docs: https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bextracts
    $.ajax({
        url: "https://ja.wikipedia.org/w/api.php",
        data: {
            action: "query",
            prop: "extracts",
            format: "json",
            redirects: true,
            exintro: true,
            exsentences: 2,
            exlimit: 1,
            explaintext: true,
            titles: jaLabel,
            origin: "*"
        },
        success: getFirstSentence
    });
}

function getFirstSentence(response) {
    var responsePart = response.query.pages;
    // Have to split parsing into two parts since jawiki pageid is unknown
    var pageId = Object.keys(responsePart)[0];
    var introText = responsePart[pageId].extract;

    if (!introText) {
        console.error("showKanji-dev.js: TextExtracts failed to get a lead for the Japanese article.");
        getWiktionary();
        return;
    }

    var wikitext = introText.toHalfWidth();

    console.log("showKanji-dev.js: lead: `" + wikitext + "`");
    console.log("showKanji-dev.js: regex: `" + kanjiRegexes.lead + "`");
    console.log("showKanji-dev.js: regex (unspaced): `" + kanjiRegexes.leadUnspaced + "`");

    var kana;
    var kanaSearch = wikitext.match(kanjiRegexes.lead);
    if (kanaSearch && kanaSearch.length == 2) {
        kana = kanaSearch[1];
    } else {
    	getWiktionary();
        return;
    }

    // Rm trailing characters
    kana = kana.replace(/[・、 ]$/, "");

    // Abort if our reading is only katakana (for non-Latin) or Latin 
    if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && kanjiRegexes.katakanaOnly.test(kana))
        || kanjiRegexes.latinOnly.test(kana)) {
    	console.log("showKanji-dev.js: throwing away reading: " + kana);
    	getWiktionary();
    	return;
    }

    displayKana(kana);
    $("#kanjiInfo").addClass("kanjiInfo-jawiki");
}

// Adapted from:
//     http://ilog4.blogspot.com/2015/09/javascript-convert-full-width-and-half.html
//     https://stackoverflow.com/a/20488304/1995949
//     https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms
String.prototype.toHalfWidth = function() {
    var halfWidth = this.replace(/[\uff01-\uff5e]/g, function(s) {return String.fromCharCode(s.charCodeAt(0) - 0xFEE0)});
    halfWidth = halfWidth.replace(/ /g, " ");
    return halfWidth;
};

// We use the English Wiktionary because it has more terms and better structure
function getWiktionary() {
	// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
    $.ajax({
        url: "https://en.wiktionary.org/w/api.php",
        data: {
            action: "parse",
            format: "json",
            page: wikidataKanji,
            prop: "sections",
            origin: "*"
        },
        success: findJapaneseSection
    });
}

function findJapaneseSection(response) {
	if (response.error) {
		console.log("showKanji-dev.js: No Wiktionary item for " + wikidataKanji);
		return;
	}
	
    var sectionsCount = response.parse.sections.length;
    var sectionIndex;
    for (let i = 0; i < sectionsCount; i++) {
        var sectionHeader = response.parse.sections[i].line;
        if (sectionHeader == "Japanese") {
        	sectionIndex = response.parse.sections[i].index;
        	break;
        }
    }
    
    if (sectionIndex == null) {
    	console.log("showKanji-dev.js: Wiktionary entry doesn't have a section titled 'Japanese'");
    	return;
    }

	// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
    $.ajax({
        url: "https://en.wiktionary.org/w/api.php",
        data: {
            action: "parse",
            format: "json",
            page: wikidataKanji,
            prop: "text",
            section: sectionIndex,
            origin: "*"
        },
        success: parseWiktionary
    });    
}

function parseWiktionary(response) {
	var html = response.parse.text["*"];
	var parsed = $($.parseHTML(html));

	// Wiktionary adds readings as furigana
	var headword = parsed.find(".headword:lang(ja)").first();
	var seeTable = parsed.find(".Jpan ruby").first();
	
	var kanji = "";
	var kana = "";
	if (headword.length) {
	    // Wiktionary already binds their kana, so we have to undo the process to get
	    // the constituent parts, at least with the current markup
	    var childNodes = headword[0].childNodes;
	    for (let i = 0; i < childNodes.length; i++) {
	    	if (childNodes[i].nodeName == "RUBY") {
	    		var ruby = $(childNodes[i]); // convert back to JQuery for convenience
	    		ruby.children("rp").remove();
	    		kana += ruby.children("rt").detach().text();
	    		kanji += ruby.text();
	    	} else if (childNodes[i].nodeType == 3) { // "#text"
			    kanji += childNodes[i].nodeValue;
			    kana += childNodes[i].nodeValue;
		    }
        }
        
        if (kanji != wikidataKanji) { return; }
	} else if (seeTable.length) {
		kanji = seeTable.children("rb").text();
		kana = seeTable.children("rt").text();
	} else {
		return;
	}

	if (kana) {
		displayKana(kana);
		$("#kanjiInfo").addClass("kanjiInfo-wiktionary");

		// Extra stuff just for fun
		var definition = headword.parent().siblings("ol").children("li").first().text();
		definition = definition.split('\n', 1)[0];
		definition = definition.replace(/\[[0-9]{1,2}\]/g, "");
		$("#kanjiInfo").prop("title", definition);
	}
}

function displayKana(kana) {
    $("#kanjiInfo ruby").append("<rt>" + kana + "</rt>");

    // Cleanup redundant furigana with another script
    var kanjiOnlyRe = /^[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]+$/;
    if (!kanjiOnlyRe.test(wikidataKanji)) {
       mw.loader.load( '//en.wikipedia.org/w/index.php?title=User:Opencooper/bindKana-dev.js&action=raw&ctype=text/javascript' );
    }
}

var wikidataId = mw.config.get( 'wgWikibaseItemId' );
var wikidataKanji;
var kanjiRegexes = {};
$(setup);