/// PerfektesChaos/js/WikiSyntaxTextMod/dC.js
/// 2019-01-01 PerfektesChaos@de.wikipedia
/// Fingerprint: #0#0#
/// <nowiki>
// WikiSyntaxTextMod: Wiki specific character handling
/* global mw:true, mediaWiki:false */
/* jshint forin:false,
bitwise:true, curly:true, eqeqeq:true, latedef:true,
laxbreak:true,
nocomma:true, strict:true, undef:true, unused:true */
if ( typeof mediaWiki !== "object" ) { // disconnected
mw = { config: false,
libs: { WikiSyntaxTextMod: { }
},
log: function () {"use strict";}
};
}
( function ( mw ) {
"use strict";
var version = -7.11,
sign = "WikiSyntaxTextMod",
sub = "C",
rls, self, WSTM;
if ( typeof mw.loader === "object" ) {
rls = { };
self = "user:PerfektesChaos/" + sign + "/" + sub;
rls[ self ] = "loading";
mw.loader.state( rls );
}
if ( typeof mw.libs[ sign ] !== "object" ) { // isolated
mw.libs[ sign ] = { };
}
WSTM = mw.libs[ sign ];
if ( typeof WSTM.w !== "object" ) {
WSTM.w = { };
}
if ( typeof WSTM.w.chr !== "object" ) {
WSTM.w.chr = { };
}
WSTM.w.chr.vsn = version;
WSTM.w.chr.self = self;
if ( typeof WSTM.bb !== "object" ) {
WSTM.bb = { };
}
if ( typeof WSTM.debugging !== "object" ) {
WSTM.debugging = { };
}
} ( mw ) );
/*
Requires: JavaScript 1.3
(String.charCodeAt String.fromCharCode String.replace)
*/
//-----------------------------------------------------------------------
mw.libs.WikiSyntaxTextMod.bb.bbC = function (WSTM) {
// Building block and run environment support
// 2012-05-18 PerfektesChaos@de.wikipedia
"use strict";
if (typeof(WSTM.util) !== "object") {
WSTM.util = { };
}
if (typeof(WSTM.util.fiatObjects) !== "function") {
WSTM.util.fiatObjects = function (adult, activate, assign) {
// Ensure existence of at least empty object
// Precondition:
// adult -- parent object
// activate -- String with name of child object
// assign -- optional object with initial definition
// if containing object components,
// they will be asserted as well
// Postcondition:
// adult has been extended
// Uses:
// .util.fiatObjects() -- recursive
// 2012-05-18 PerfektesChaos@de.wikipedia
var elt, obj, s;
if (typeof( adult[activate] ) !== "object") {
adult[activate] = (assign ? assign : { } );
}
if (assign) {
obj = adult[activate];
for (s in assign) {
elt = assign[s];
if (typeof(elt) === "object") {
WSTM.util.fiatObjects(obj, s, elt);
}
} // for s in obj
}
}; // .util.fiatObjects()
}
WSTM.util.fiatObjects(WSTM, "debugging", { loud: false });
}; // .bb.bbC()
mw.libs.WikiSyntaxTextMod.bb.bbC(mw.libs.WikiSyntaxTextMod);
delete mw.libs.WikiSyntaxTextMod.bb.bbC;
//-----------------------------------------------------------------------
mw.libs.WikiSyntaxTextMod.bb.chr = function (WSTM) {
// Wiki specific character handling
// Uses:
// .util.fiatObjects()
// 2012-10-10 PerfektesChaos@de.wikipedia
"use strict";
WSTM.util.fiatObjects( WSTM, "w",
{ chr: { lang: { },
re: { }
}
} );
if (! WSTM.w.chr.detected) {
WSTM.w.chr.detected = { ampersand: false,
exchange: 0,
nbHyphen: false,
ordMasc: false,
tab: false,
tabEOL: false,
white: false
};
}
WSTM.w.chr.fallback = function (adjust) {
// Precondition:
// adjust -- charcode to be checked
// Postcondition:
// Returns true iff charcode is candidate for substitution
// 2014-10-03 PerfektesChaos@de.wikipedia
var r = false;
if (adjust >= 8544) {
r = ((adjust >= 8544 && adjust <= 8575) || // fullwidth
(adjust >= 10005 && adjust <= 10015) || // Roman
(adjust >= 12288 && adjust <= 12301) || // ideogr
(adjust >= 65279 && adjust <= 65374) // Dingbats
);
}
return r;
}; // .w.chr.fallback()
WSTM.w.chr.fashion = function (adjust, address) {
// Exchange string, replace Dingbats, fullwidth, Roman digits
// Precondition:
// adjust -- String to be manipulated
// address -- last position of suspicious char within adjust
// Postcondition:
// Returns modified String, exchanged undesired characters
// Uses:
// > .w.chr.detected.exchange
// .w.chr.format()
// .str.setChar()
// .str.setString()
// 2012-10-18 PerfektesChaos@de.wikipedia
var r = false,
s = adjust,
c,
i,
x;
for (i = address; i >= 0; i--) {
c = s.charCodeAt(i);
if (c >= this.detected.exchange) {
if ((c >= 8544 && c <= 8575) || // Roman digits
(c >= 12288 && c <= 12301) || // ideographic
(c >= 65281 && c <= 65374)) { // fullwidth
x = this.format(c, s, i);
if (x !== c) {
r = typeof(x);
if (r === "number") {
s = WSTM.str.setChar(s, x, i);
} else if (r === "string") {
s = WSTM.str.setString(s, i, 1, x);
} else {
s = r;
}
r = s;
}
} else if (c >= 10005 && c <= 10015) { // Dingbats
switch (c) {
case 10005 : // 2715
case 10006 : // 2716
c = 215; // times
break;
case 10013 : // 271D
case 10014 : // 271E
case 10015 : // 271F
c = 8224; // dagger
break;
} // switch c
if (c < 10000) {
s = WSTM.str.setChar(s, c, i);
r = s;
}
} // rare codes
} // .w.chr.detected.exchange
} // for i
return r;
}; // .w.chr.fashion()
WSTM.w.chr.filter = function (all) {
// Improve character encoding in string, encode, trim lines
// Precondition:
// all -- root WikiTom to be manipulated
// Postcondition:
// Modifies all::
// >< .source -- exchanged unreadable characters
// removed trailing whitespace / control codes
// < .learnt -- true: significant encoding change
// false: possibly trailing whitespace cut
// Characters which may occur in links and syntax are postponed.
// Uses:
// > .warn.char
// >< .w.chr.detected.exchange
// >< .w.chr.whitespace
// < .w.encountered.indent
// < .w.encountered.bidichar
// < .w.chr.detected.ampersand
// < .w.chr.detected.nbHyphen
// < .w.chr.detected.ordMasc
// < .w.chr.detected.tab
// < .w.chr.detected.tabEOL
// < .w.chr.detected.white
// .str.setString()
// .str.setChar()
// .w.chr.form()
// .w.chr.fallback()
// .w.chr.fishing()
// Requires: JavaScript 1.3 charCodeAt()
// 2016-01-24 PerfektesChaos@de.wikipedia
var d = false,
l = false,
s = all.source,
c,
i,
k;
for (i = s.length - 1; i >= 0; i--) {
c = s.charCodeAt(i);
if (c < 128) { // ASCII
if (c < 32) { // control code
switch (c) {
case 10 : // newline
if (s.charCodeAt(k + 1) === 10) {
WSTM.w.encountered.indent = true;
}
for (k = i-1; k >= 0; k--) {
c = s.charCodeAt(k);
if (c > 32 || c === 10) {
if (c === 61) { // '='
k++;
if (s.charCodeAt(k) === 9) {
k--;
}
} // template?
break; // for k
} else if (c === 9) {
this.detected.tabEOL = true;
}
} // for k
if (k < i - 1) { // trailing whitesp: minor mod
this.detected.white = true;
k++;
s = WSTM.str.setString(s, k, i - k, "");
i = k;
}
break;
case 13 : // IE inserts CR before any LF
break;
case 9 : // hor tab
this.detected.tab = true;
break;
case 11 : // vertical tab
case 12 : // form feed
s = WSTM.str.setChar(s, 10, i); // newline
l = true;
break;
default: // forbidden
s = WSTM.str.setChar(s, "", i); // discard
l = true;
break;
} // switch c
} else if (c === 38) { // '&'
if (s.substr(i + 1, 5) !== "nbsp;") {
this.detected.ampersand = true;
}
}
} else {
if (c <= 160) { // windows codepages and nbsp
s = WSTM.str.setChar(s, "&#" + c + ";", i); // show
l = true;
} else if (c < 8193) { // common chars
switch (c) {
case 186 : // masculine ordinal indicator
this.detected.ordMasc = true;
break;
case 847 : // COMBIN GRAPHEME JOINER \x034F
c = this.form(c);
s = WSTM.str.setChar(s, c, i);
l = true;
break;
} // switch c
} else if (c > 8193 && c < 8289) { // Unicode control
// make visible
switch (c) {
case 8194 : // enSpace \u2002
case 8195 : // emSpace \u2003
case 8196 : // THREE-PER-EM SPACE \u2004
case 8197 : // FOUR-PER-EM SPACE \u2005
case 8200 : // PUNCTUATION SPACE \u2008
s = WSTM.str.setChar(s, 32, i); // space
l = true;
break;
case 8198 : // SIX-PER-EM SPACE \u2006
case 8201 : // thinsp
s = WSTM.str.setChar(s, " ", i);
d = true;
break;
case 8239 : // NARROW NO-BREAK SPACE
s = WSTM.str.setChar(s, " ", i);
d = true;
break;
case 8203 : // ZERO WIDTH SPACE used in .lang.chr.zwsp
s = WSTM.str.setChar(s, " ", i);
d = true;
break;
case 8204 : // ZERO WIDTH NON-JOINER in .lang.chr.zwnj
s = WSTM.str.setChar(s, "‌", i);
d = true;
break;
case 8205 : // ZERO WIDTH JOINER in .lang.chr.zwj
s = WSTM.str.setChar(s, "‍", i);
d = true;
break;
case 8209 : // NON-BREAKING HYPHEN \u2011
this.detected.nbHyphen = true;
break;
case 8206 : // LEFT-TO-RIGHT MARK \u200E
case 8207 : // RIGHT-TO-LEFT MARK \u200F
case 8234 : // LEFT-TO-RIGHT EMBEDDING
case 8235 : // RIGHT-TO-LEFT EMBEDDING
case 8236 : // POP DIRECTIONAL FORMATTING
case 8237 : // LEFT-TO-RIGHT OVERRIDE
case 8238 : // RIGHT-TO-LEFT OVERRIDE
WSTM.w.encountered.bidichar = true;
// fall through
case 8202 : // hair space (english typography)
case 8232 : // LINE SEPARATOR
case 8233 : // PARAGRAPH SEPARATOR
case 8287 : // MEDIUM MATHEMATICAL SPACE
case 8288 : // WORD JOINER
c = this.form(c);
s = WSTM.str.setChar(s, c, i);
l = true;
break;
} // switch c
} else if (c === 8960) { // Unicode
// \u2300 DIAMETER SIGN TODO
s = WSTM.str.setChar(s, 248, i); // ø
l = true;
} else if ((c >= 57344 && c <= 63743) ||
(c >= 983040 && c <= 1114111)) { // Private Use
s = WSTM.str.setChar(s, "&#" + c + ";", i); // show
l = true;
// U+E000-F8FF U+F0000-10FFFF
} else if (this.fallback(c)) {
if (this.detected.exchange) {
if (this.detected.exchange > c) {
this.detected.exchange = c;
}
} else {
this.detected.exchange = c;
}
} // rare codes
} // character types
} // for i --
if (WSTM.warn.char) {
this.fishing(s);
}
c = s.charCodeAt(0);
while (c === 10) { // skip heading \n
s = s.substr(1);
c = s.charCodeAt(0);
this.detected.white = true;
} // while ! c
k = s.length - 1;
c = s.charCodeAt(k);
while (c === 10) { // remove trailing \n
s = s.substr(0, k);
k--;
c = s.charCodeAt(k);
this.detected.white = true;
} // while ! c
this.whitespace = (this.detected.tab ? " \n\t" : " \n");
if (d) {
this.detected.ampersand = true;
all.learnt = true;
} else if (l) {
all.learnt = true;
}
all.source = s;
}; // .w.chr.filter()
WSTM.w.chr.fishing = function (analyse) {
// Warn of unwanted character codes
// Precondition:
// analyse -- string to be analysed
// Postcondition:
// Issues warning
// > .warn.char
// .warn.found()
// Requires: JavaScript 1.3 charCodeAt()
// 2015-03-12 PerfektesChaos@de.wikipedia
var wch = WSTM.warn.char,
m = wch.length,
max = 0,
min = 4294967295,
n = analyse.length,
c, got, i, k;
for (k = 0; k < m; k++) {
c = wch[k];
if (c < min) {
min = c;
}
if (c > max) {
max = c;
}
} // for k
for (i = 0; i < n; i++) {
c = analyse.charCodeAt(i);
if (c >= min && c <= max) {
for (k = 0; k < m; k++) {
if (c === wch[k]) {
if (! got) {
got = { };
}
if (typeof got[c] !== "number") {
got[c] = 0;
}
got[c]++;
break; // for k
}
} // for k
}
} // for i
if (got) {
for (c in got) {
WSTM.warn.found("char", got[c] + "× " + c);
} // for c in got
}
}; // .w.chr.fishing()
WSTM.w.chr.fixed = function (adjust) {
// Check whether char entity should be kept escaped in wikisyntax
// Precondition:
// adjust -- character code, or false
// Postcondition:
// Returns false if replacement not recommended, or char code
// Uses:
// < .w.encountered.bidichar
// 2016-09-18 PerfektesChaos@de.wikipedia
var r = false;
if (adjust && adjust < 65534) {
r = adjust;
switch (r) {
/* ML escapes */
case 8206 : // lrm LEFT-TO-RIGHT MARK
case 8207 : // rlm RIGHT-TO-LEFT MARK
WSTM.w.encountered.bidichar = true;
// fall through
case 34 : // quot
case 38 : // amp
case 39 : // apos
case 60 : // lt
case 62 : // gt
/* special chars to be kept */
case 160 : // nbsp
case 710 : // circ
case 732 : // tilde
case 8201 : // thinsp
/* wikisyntax escapes */
case 91 : // [ lsqb
case 93 : // ] rsqb
case 123 : // { lcub
case 124 : // | verbar
case 125 : // } rcub
r = false;
break;
/* special chars to be deleted one day */
case 8203 : // zwsp // required in .lang.chr.zwsp
case 8204 : // zwnj // required in .lang.chr.zwnj
case 8205 : // zwj // required in .lang.chr.zwj
r = 0;
break;
/* to be replaced by ASCII space */
case 8194 : // ensp
case 8195 : // emsp
r = 32;
break;
} // switch r
}
return r;
}; // .w.chr.fixed()
WSTM.w.chr.fixTab = function (adjust) {
// Remove tab chars from string, if any
// Precondition:
// adjust -- string to be cleaned
// Postcondition:
// Returns false, if no tab char found, or modified string
// 2012-03-12 PerfektesChaos@de.wikipedia
var r = false;
if (adjust.indexOf("\t") >= 0) {
r = adjust.replace(/\t/g, " ");
}
return r;
}; // .w.chr.fixTab()
WSTM.w.chr.flat = function (adjust, assign, alert) {
// Exchange unwanted entities with char or standardized entity form
// Precondition:
// adjust -- string "&named;" or "&#xHHHH;" or "&#NNN;"
// assign -- true if named entity, false if numeric
// alert -- true if inspect
// Postcondition:
// Returns false if replacement not recommended,
// otherwise string with (single) character or entity
// Uses:
// > .w.chr.lang.cjk
// > .warn.entity
// < .w.encountered.bidichar
// .str.charEntityHTML5single()
// .warn.found()
// .str.charEntityCode()
// .w.chr.form()
// Requires: JavaScript 1.3 fromCharCode()
// 2016-09-20 PerfektesChaos@de.wikipedia
var r;
if (assign) {
r = WSTM.str.charEntityHTML5single(adjust);
if (alert
&& WSTM.warn.entity
&& WSTM.warn.entity.indexOf(adjust) >= 0) {
WSTM.warn.found("entity", "&" + adjust.substr(1));
}
} else {
r = WSTM.str.charEntityCode(adjust);
if (alert
&& WSTM.warn.entity
&& WSTM.warn.entity.indexOf("#"+r) >= 0) {
WSTM.warn.found("entity", "&#" + r + ";");
}
} // named or numeric
if (r) {
switch (r) {
case 32 : // ASCII space
r = " ";
break;
// ML syntax escaped
case 34 : // quot
case 38 : // amp
case 39 : // apos
case 60 : // lt
case 62 : // gt
// wikisyntax escaped
case 32 : // SPC
case 35 : // #
case 42 : // *
case 58 : // :
case 59 : // ;
case 61 : // =
case 91 : // [
case 93 : // ]
case 95 : // _
case 123 : // {
case 124 : // |
case 125 : // }
// invisible information kept visual
case 96 : // grave
case 160 : // nbsp
case 168 : // uml
case 175 : // macr
case 180 : // acute
case 184 : // cedil
case 710 : // circ
case 711 : // caron
case 728 : // breve
case 730 : // ring
case 731 : // ogon
case 732 : // tilde
case 733 : // dacute
case 173 : // soft hyphen
case 8201 : // thinsp
case 8202 : // hair space (english typography)
case 8209 : // NON-BREAKING HYPHEN
case 8239 : // NARROW NO-BREAK SPACE
r = false;
break;
// replace by ASCII space
case 8194 : // ensp
case 8195 : // emsp
case 8196 : // THREE-PER-EM SPACE
case 8197 : // FOUR-PER-EM SPACE
case 8200 : // PUNCTUATION SPACE \u2008
r = 32;
break;
case 8198 : // SIX-PER-EM SPACE
case 8201 : // thinsp
r = " ";
break;
case 8204 : // ZERO WIDTH NON-JOINER
case 8205 : // ZERO WIDTH JOINER
r = false;
break;
// to be replaced later
case 8232 : // LINE SEPARATOR
case 8233 : // PARAGRAPH SEPARATOR
// just make entities
case 847 : // COMBINING GRAPHEME JOINER \x034F
case 8203 : // ZERO WIDTH SPACE
case 8287 : // MEDIUM MATHEMATICAL SPACE
case 8288 : // WORD JOINER
r = this.form(r);
break;
case 8206 : // LEFT-TO-RIGHT MARK \u200E
case 8207 : // RIGHT-TO-LEFT MARK \u200F
case 8234 : // LEFT-TO-RIGHT EMBEDDING
case 8235 : // RIGHT-TO-LEFT EMBEDDING
case 8236 : // POP DIRECTIONAL FORMATTING
case 8237 : // LEFT-TO-RIGHT OVERRIDE
case 8238 : // RIGHT-TO-LEFT OVERRIDE
r = this.form(r);
WSTM.w.encountered.bidichar = true;
break;
case 8960 : // DIAMETER SIGN \u2300 TODO
r = 248; // ø
break;
case 65279 : // ZERO WIDTH NO-BREAK SPACE / BYTE ORDER MARK
r = "";
break;
default:
if (r >= 10240) { // Braille and extra-European
if (WSTM.w.chr.lang.cjk) {
r = (r >= 11904 && r <= 65131); // U+2E80-FE6B
} else {
r = false;
}
} else if (r > 8193) { // U+2001
r = this.form(r);
} else if (r < 160) {
r = false;
}
break;
} // switch r
} // detected
if (r) {
if (typeof(r) === "number") {
r = String.fromCharCode(r);
} // conversion number -> string
if (adjust === r) {
r = false;
} // unchanged
} // conversion
return r;
}; // .w.chr.flat()
WSTM.w.chr.flip = function (all) {
// Exchange and standardize undesired character entities
// Precondition:
// all -- root WikiTom to be adjusted
// Postcondition:
// Modified all, if appropriate
// Uses:
// > .warn.char
// > .w.chr.detected.ampersand
// >< .warn.entity
// .w.chr.re.fiat()
// .o.WikiTom().find()
// .w.chr.flipper()
// .o.WikiTom().flip()
// 2015-03-12 PerfektesChaos@de.wikipedia
var got, i, s, shift;
if (this.detected.ampersand) {
this.re.fiat();
if (WSTM.warn.entity) {
s = "";
for (i = 0; i < WSTM.warn.entity.length; i++) {
s = s + "&" + WSTM.warn.entity[i] + ";";
} // for i
WSTM.warn.entity = s;
}
if (WSTM.warn.char) {
if (typeof WSTM.warn.entity !== "string") {
WSTM.warn.entity = "";
}
for (i = 0; i < WSTM.warn.char.length; i++) {
WSTM.warn.entity = WSTM.warn.entity
+ "&#" + WSTM.warn.char[i] + ";";
} // for i
}
got = { i: 0, k: 0 };
do {
got = all.find("&", got.i, got.k, true, false);
if (got) {
s = all.fetch(got.k, got.i);
if (s) {
shift = this.flipper(s);
if (shift) {
all.flip(got.k, got.i, s.length, shift);
}
}
got.i = 0;
got.k++;
}
} while (got); // do
}
}; // .w.chr.flip()
WSTM.w.chr.flipper = function (adjust) {
// Exchange and standardize undesired character entity
// Precondition:
// adjust -- string to be adjusted
// Postcondition:
// Returns adjusted string, or false
// Uses:
// > .w.chr.re.entities
// .w.chr.flat()
// 2015-02-26 PerfektesChaos@de.wikipedia
var m = false,
r = false,
s = adjust,
c,
e,
f,
j,
k,
l;
for (k = 0; k < 2; k++) {
l = (k === 1);
r = this.re.entities[k];
r.lastIndex = 0;
do {
j = r.lastIndex;
f = r.exec(s);
if (f) {
e = f[0];
if (e !== " ") {
c = this.flat(e, l, false);
if (typeof c === "string") {
m = true;
j = f.index;
s = s.substr(0, j)
+ c
+ s.substr(j + e.length);
r.lastIndex = j + 1;
} //
} // replace, except nbsp
} // found?
} while (f); // do
} // for k
return (m ? s : false);
}; // .w.chr.flipper()
WSTM.w.chr.flushChars = function (adjust) {
// Exchange undesired chars in modifiable nodes with better ones
// Precondition:
// adjust -- WikiTom to be analyzed
// Postcondition:
// Nodes are modified where suitable.
// Uses:
// > .w.chr.detected.exchange
// .w.chr.flushChars() -- recursive
// .w.chr.fashion()
// Requires: JavaScript 1.3 charCodeAt()
// 2012-05-19 PerfektesChaos@de.wikipedia
var c,
i,
n,
s;
if (adjust.children) {
n = adjust.children.length;
for (i = 0; i < n; i++) {
this.flushChars(adjust.children[i]);
} // for i
} else if (adjust.lookup) {
if (adjust.mode <= WSTM.o.WikiTom.TextOnly) {
s = adjust.source;
if (s) {
n = false;
for (i = s.length; i >= 0; i--) {
c = s.charCodeAt(i);
if (c >= WSTM.w.chr.detected.exchange) { // Dingbats..
s = WSTM.w.chr.fashion(s, i);
n = true;
break; // for i
} // suspiciuos character
} // for i
if (n) {
if (adjust.source !== s) {
adjust.fresh(s);
}
}
}
}
}
}; // .w.chr.flushChars()
WSTM.w.chr.flushEntities = function (adjust) {
// Exchange undesired entities in modifiable nodes with UCS chars
// Precondition:
// adjust -- WikiTom to be analyzed
// Postcondition:
// Nodes are modified where suitable.
// Uses:
// > .o.WikiTom.TextOnly
// >< .w.chr.re.entities
// .w.chr.flushEntities() -- recursive
// .w.chr.re.fiat()
// .w.chr.flat()
// 2012-04-30 PerfektesChaos@de.wikipedia
var c,
e,
f,
i,
j,
k,
l,
m,
n,
s,
r;
if (adjust.children) {
n = adjust.children.length;
for (i = 0; i < n; i++) {
this.flushEntities(adjust.children[i]);
} // for i
} else if (adjust.lookup) {
if (adjust.mode <= WSTM.o.WikiTom.TextOnly) {
s = adjust.source;
if (s) {
m = false;
i = s.indexOf("&");
if (i >= 0) {
this.re.fiat();
for (k = 0; k < 2; k++) {
l = (k === 1);
r = this.re.entities[k];
r.lastIndex = 0;
do {
j = r.lastIndex;
f = r.exec(s);
if (f) {
e = f[0];
if (e !== " ") {
c = this.flat(e, l, true);
if (typeof c === "string") {
j = f.index;
m = j + e.length;
s = s.substr(0, j)
+ c
+ s.substr(m);
r.lastIndex = j + 1;
} //
} // replace, except nbsp
} // found?
} while (f); // do
} // for k
} // '&' in node
if (m) {
if (adjust.source !== s) {
adjust.fresh(s);
}
}
}
}
}
}; // .w.chr.flushEntities()
WSTM.w.chr.form = function (adjust) {
// Is char code to be replaced by a more common code or string?
// Precondition:
// adjust -- character code
// Postcondition:
// Returns original (adjust) or replacing char code or string
// Uses:
// >< .w.chr.detected.exchange
// < .w.encountered.bidichar
// .str.hexcode()
// .w.chr.fallback()
// 2016-01-24 PerfektesChaos@de.wikipedia
var c = adjust;
switch (adjust) {
case 8194 : // N-SPACE
case 8195 : // M-SPACE
case 8200 : // PUNCTUATION SPACE
c = 32; // SPC
break;
case 8206 : // LEFT-TO-RIGHT MARK \u200E
WSTM.w.encountered.bidichar = true;
c = "‎";
break;
case 8207 : // RIGHT-TO-LEFT MARK \u200F
WSTM.w.encountered.bidichar = true;
c = "‏";
break;
case 847 : // COMBINING GRAPHEME JOINER \x034F
case 8196 : // THREE-PER-EM SPACE
case 8197 : // FOUR-PER-EM SPACE
case 8198 : // SIX-PER-EM SPACE
case 8203 : // ZERO WIDTH SPACE
case 8232 : // LINE SEPARATOR -- make visible as entity first
// then remove from links, remaining as '\n'
case 8233 : // PARAGRAPH SEPARATOR
case 8234 : // LEFT-TO-RIGHT EMBEDDING
case 8235 : // RIGHT-TO-LEFT EMBEDDING
case 8236 : // POP DIRECTIONAL FORMATTING
case 8237 : // LEFT-TO-RIGHT OVERRIDE
case 8238 : // RIGHT-TO-LEFT OVERRIDE
c = "&#x" + WSTM.str.hexcode(adjust, 4, false) + ";";
break;
case 8202 : // HAIR SPACE
c = ""; // hook: en=keep de: ""
break;
case 8287 : // MEDIUM MATHEMATICAL SPACE
c = " ";
break;
case 8288 : // WORD JOINER
c = "";
break;
case 8960 : // DIAMETER SIGN \u2300 TODO
c = 248; // ø
break;
default:
if (this.fallback(c)) {
if (this.detected.exchange) {
if (this.detected.exchange > adjust) {
this.detected.exchange = adjust;
}
} else {
this.detected.exchange = adjust;
}
}
} // switch adjust
return c;
}; // .w.chr.form()
WSTM.w.chr.format = function (adjust, around, address) {
// Is char to be replaced by a more common one in this context?
// Precondition:
// adjust -- character code
// around -- entire context string (modifiable)
// false if no context present
// address -- position of adjust within around
// Postcondition:
// Returns original (adjust) or replacing char code or string
// Uses:
// > .w.chr.detected.exchange
// .str.charEntityCode()
// .str.charEntityHTML5single()
// .w.chr.fixed()
// .w.chr.fullwidth()
// Requires: JavaScript 1.3 charCodeAt()
// 2019-01-01 PerfektesChaos@de.wikipedia
var cb = 0,
cf = 0,
r = adjust,
c,
i,
n;
if (around) {
n = around.length;
if (address > 0) {
cb = around.charCodeAt(address - 1);
if (cb === 59) { // ';'
if (address > 4) {
i = around.lastIndexOf("&", address - 2);
if (i >= 0) {
c = around.substr(i, address - i);
if (around.charCodeAt(i + 1) === 35) { // '#'
c = WSTM.str.charEntityCode(c);
} else {
c = WSTM.str.charEntityHTML5single(c);
if (c) {
c = this.fixed(c);
}
}
if (c) {
cb = c;
}
} // entity
}
} // entity?
} // ahead
if (address < n - 1) {
cf = around.charCodeAt(address + 1);
if (cf === 38) { // '&'
if (address < n - 4) {
i = around.indexOf(";", address + 2);
if (i > address) {
c = around.substr(address + 1, i - address);
if (around.charCodeAt(address + 2) === 35) { // '#'
c = WSTM.str.charEntityCode(c);
} else {
c = WSTM.str.charEntityHTML5single(c);
if (c) {
c = this.fixed(c);
}
}
if (c) {
cf = c;
}
} // entity
}
} // entity?
} // after
} // around
if (cb !== 32 ||
(cf !== 10 && cf !== 32)) { // not in whitespace
if (adjust >= 8544 && adjust <= 8575) { // Roman digits
if (cb < 11904 && cf < 11904) { // Not CJK
// > .w.chr.detected.exchange
switch (adjust) {
case 8544 :
r = 73; // I
break;
case 8545 :
r = "II";
break;
case 8546 :
r = "III";
break;
case 8547 :
r = "IV";
break;
case 8548 :
r = 86; // V
break;
case 8549 :
r = "VI";
break;
case 8550 :
r = "VII";
break;
case 8551 :
r = "VIII";
break;
case 8552 :
r = "IX";
break;
case 8553 :
r = 88; // X
break;
case 8554 :
r = "XI";
break;
case 8555 :
r = "XII";
break;
case 8556 :
r = 76; // L
break;
case 8557 :
r = 67; // C
break;
case 8558 :
r = 68; // D
break;
case 8559 :
r = 77; // M
break;
case 8560 :
r = 105; // i
break;
case 8561 :
r = "ii";
break;
case 8562 :
r = "iii";
break;
case 8563 :
r = "iv";
break;
case 8564 :
r = 118; // v
break;
case 8565 :
r = "vi";
break;
case 8566 :
r = "vii";
break;
case 8567 :
r = "viii";
break;
case 8568 :
r = "ix";
break;
case 8569 :
r = 120; // x
break;
case 8570 :
r = "xi";
break;
case 8571 :
r = "xii";
break;
case 8572 :
r = 108; // l
break;
case 8573 :
r = 99; // c
break;
case 8574 :
r = 100; // d
break;
case 8575 :
r = 109; // m
break;
} // switch adjust
} // Roman digits permitted within CJK
/*
} else if (adjust >= 12288 && adjust <= 12301) { // ideograph
// U+3000=12288 ... U+300D=12301
r = this.fullwidth(cb, adjust, cf, false);
*/
} else if (adjust === 65279) { // U+FEFF BYTE ORDER MARK
r = "";
/*
} else if (adjust >= 65281 && adjust <= 65374) { // fullwidth
// U+FF01=65281 ... U+FF5E=65374
r = this.fullwidth(cb, adjust, cf, 65248);
*/
} // adjust type
} // not in whitespace
return r;
}; // .w.chr.format()
/*
WSTM.w.chr.fullwidth = function (ahead, adjust, after, adapt) {
// Is CJK char to be replaced by ASCII in this context?
// Precondition:
// ahead -- character code just before
// adjust -- character code of interest (CJK latin)
// after -- character code just following
// adapt -- calculate ASCII from adjust; or false
// Postcondition:
// Returns original (adjust) or replacing char code
// 2012-10-18 PerfektesChaos@de.wikipedia
var r = adjust;
if ((ahead < 11904 && after !== 32) ||
(after < 11904 && ahead !== 32) ||
(ahead >= 65281 && ahead <= 65374) ||
(after >= 65281 && after <= 65374) ||
(ahead < 11904 && after < 11904 &&
(ahead !== 32 || after !== 32) &&
ahead !== 60 && ahead !== 62 &&
after !== 60 && after !== 62)) {
// Neighbour not CJK, not sole, not >ruby<
if (adapt) {
r = adjust - adapt; // calculate from ASCII
} else {
switch (adjust) {
case 12288 :
r = 32; // U+3000 SPACE
break;
case 12289 :
r = 44; // U+3001 COMMA
break;
case 12290 :
r = 46; // U+3002 PERIOD
break;
case 12295 :
r = 48; // U+3007 NUMBER ZERO
break;
case 12296 :
r = 8249; // U+3008 LEFT ANGLE BRACKET ›
break;
case 12297 :
r = 8250; // U+3009 RIGHT ANGLE BRACKET ›
break;
case 12298 :
r = 171; // U+300A LEFT DOUBLE ANGLE BRACKET «
break;
case 12299 :
r = 187; // U+300B RIGHT DOUBLE ANGLE BRACKET »
break;
case 12300 :
// r = 8968; // U+300C LEFT CORNER BRACKET ⌈
break;
case 12301 :
// r = 8971; // U+300D RIGHT CORNER BRACKET ⌋
break;
} // switch adjust
}
} // fullwidth forms permitted within CJK
return r;
}; // .w.chr.fullwidth()
*/
WSTM.w.chr.re.fiat = function () {
// Ensure availability of regular expressions (for entities)
// Postcondition:
// RegExp available
// Uses:
// >< .w.chr.re.entities
// 2016-09-18 PerfektesChaos@de.wikipedia
var s0, s1;
if (! this.entities) {
s0 = "&#(x?[0-9A-Fa-f]+|[0-9]+);";
s1 = "&[A-Za-z]+[A-Za-z123]+;";
this.entities = [ new RegExp(s0, "g"),
new RegExp(s1, "g") ];
}
}; // .w.chr.re.fiat()
}; // .bb.chr()
mw.libs.WikiSyntaxTextMod.bb.chr(mw.libs.WikiSyntaxTextMod);
delete mw.libs.WikiSyntaxTextMod.bb.chr;
//-----------------------------------------------------------------------
( function ( WSTM ) {
"use strict";
var sub = "C",
self = WSTM.w.chr.self,
version = WSTM.w.chr.vsn,
rls;
if ( typeof WSTM.main !== "object" ) {
WSTM.main = { };
}
if ( ! WSTM.main.bb ) {
WSTM.main.bb = { };
}
WSTM.main.bb[ sub ] = { load: true,
vsn: version };
if ( typeof WSTM.main.wait === "function" ) {
// Start on import: callback to waiting ...
WSTM.main.wait( sub, version );
}
if ( typeof mw.loader === "object" &&
typeof mw.hook !== "undefined" ) {
rls = { };
rls[ self ] = "ready";
mw.loader.state( rls );
mw.hook( "WikiSyntaxTextMod/" + sub + ".ready" )
.fire( [ sub, version ] );
}
} ( mw.libs.WikiSyntaxTextMod ) );
// Emacs
// Local Variables:
// coding: utf-8-dos
// fill-column: 80
// End:
/// EOF </nowiki> WikiSyntaxTextMod/dC.js