User:Harej/citation-watchlist.js

Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump.
This code will be executed when previewing this page.
This user script seems to have a documentation page at User:Harej/citation-watchlist.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* Per-wiki configuration */

const LANGUAGE = 'en';
const FAMILY = 'wikipedia';
const endpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const restApiEndpoint = `https://api.wikimedia.org/core/v1`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const msgWarning = "Warning";
const msgCaution = "Caution";
const msgInspect = "Inspect";
const warnEmoji = '\u2757';
const cautionEmoji = '\u270B';
const inspectEmoji = '\uD83D\uDD0E';
const warnSectionHeader = "==Warn==";
const cautionSectionHeader = "==Caution==";
const inspectSectionHeader = "==Inspect==";


/*
Citation Watchlist Script – Highlights watchlist entries when questionable sources are added

author:  Hacks/Hackers
license: GPL 4.0
*/

let publicSuffixSet = new Set();
let warnList = new Set();
let cautionList = new Set();
let inspectList = new Set();

function prependEmojiWithTooltip(element, emoji, domains, tooltipText) {
  let processedType = '';
  if (emoji === warnEmoji) {
    processedType = 'warn';
  } else if (emoji === cautionEmoji) {
    processedType = 'caution';
  } else if (emoji === inspectEmoji) {
    processedType = 'inspect';
  } else {
    console.error('Unsupported emoji type');
    return;
  }

  if (element.getAttribute(`data-processed-${processedType}`) === 'true') {
    return;
  }

  const emojiSpan = document.createElement('span');
  emojiSpan.textContent = emoji + " ";
  emojiSpan.title = tooltipText + ": " + domains.join(", ");
  element.parentNode.insertBefore(emojiSpan, element);
  element.setAttribute(`data-processed-${processedType}`, 'true');
}

async function parseWatchlist() {
  // Select all containers of the watchlist links to process them individually
  const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
  const revisions = [];

  let linkCounter = 0;

  for (const container of entriesContainers) {
    // Within each container, try to find a "diff" link first
    const diffLink = container.querySelector('a.mw-changeslist-diff');
    const histLink = container.querySelector('a.mw-changeslist-history');
    const prevLink = container.querySelector('a.mw-history-histlinks-previous');
    const curLink = container.querySelector('a.mw-history-histlinks-current');

    console.log(`
    Diff link: ${diffLink}
    Hist link: ${histLink}
    Prev link: ${prevLink}
    Cur link: ${curLink}
    `);

    let urlParams = '';

    if (diffLink) {
      linkCounter += 1;
      // If a "diff" link is found, process it
      urlParams = new URLSearchParams(diffLink.href);
      const diffId = await fetchPreviousRevisionId(urlParams.get('diff'));
      revisions.push({
        oldrevision: urlParams.get('diff'),
        newrevision: diffId,
        element: diffLink.parentNode.parentNode
      });
    } else if (histLink) {
      linkCounter += 1;
      // If no "diff" link is found but a "hist" link is, process the "hist" link
      urlParams = new URLSearchParams(histLink.href);
      const pageID = urlParams.get('curid');
      const firstID = await fetchFirstRevisionId(pageID);
      revisions.push({
        oldrevision: firstID,
        element: histLink.parentNode.parentNode
      });
      // At this point, check if we are on a page history rather than watchlist
    } else if (prevLink) {
      linkCounter += 1;
      urlParams = new URLSearchParams(prevLink.href);
      const diffId = await fetchPreviousRevisionId(urlParams.get('oldid'));
      revisions.push({
        oldrevision: urlParams.get('oldid'),
        newrevision: diffId,
        element: prevLink.parentNode.parentNode
      });
      // No prev link means we are at the page's first revision
    } else if (curLink) {
      linkCounter += 1;
      urlParams = new URLSearchParams(curLink.href);
      revisions.push({
        // We do not actually want to compare to the current revision. We extract
        // the oldid and treat like a new page.
        oldrevision: urlParams.get('oldid'),
        element: curLink.parentNode.parentNode
      });
    }
  }

  // Finally, to get to this point, you are on a page history with only
  // one revision, and therefore no links of any kind. Extract first (and
  // only) revision ID from page title.
  if (linkCounter == 0) {
    const pageID = mw.config.get('wgArticleId');
    const firstID = await fetchFirstRevisionId(pageID);
    revisions.push({
      oldrevision: firstID,
      element: entriesContainers[0]
    });
  }

  return revisions;
}

async function buildURL(params) {
  const url = new URL(endpoint);
  Object.keys(params).forEach(key => url.searchParams.append(key, params[key]));
  return url;
}

function getRootDomain(hostname, publicSuffixSet) {
  const domainParts = hostname.split('.');
  for (let i = 0; i < domainParts.length; i++) {
    const candidate = domainParts.slice(i).join('.');
    if (publicSuffixSet.has(candidate) || publicSuffixSet.has(`!${candidate}`)) {
      return domainParts.slice(i - 1).join('.');
    }
  }
  return hostname;
}

function extractAddedURLs(addedParts) {
  const addedURLs = [];
  const urlRegex = /https?:\/\/[^\s<"]+/g;
  let match;
  while ((match = urlRegex.exec(addedParts)) !== null) {
    addedURLs.push(match[0]);
  }
  return addedURLs;
}

async function fetchFromActionAPI(params) {
  const url = await buildURL(params);

  try {
    const response = await fetch(url);
    if (!response.ok) {
      throw new Error(`Network response was not ok: ${response.statusText}`);
    }
    return await response.json();
  } catch (error) {
    console.error('Error fetching data from MediaWiki API:', error);
    throw error;
  }
}

async function fetchPublicSuffixList() {
  const pslUrl = `https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`
  try {
    const response = await fetch(pslUrl);
    const content = await response.text();
    const suffixSet = new Set();
    const lines = content.split('\n');
    for (const line of lines) {
      if (line.trim() && !line.trim().startsWith('//')) {
        suffixSet.add(line.trim());
      }
    }
    return suffixSet;
  } catch (error) {
    console.error("Error fetching Public Suffix List:", error);
    return new Set();
  }
}

async function fetchDiffFromAPI(apiUrl) {
  try {
    const response = await fetch(apiUrl);
    const data = await response.json();
    return data["source"] || data["diff"];
  } catch (error) {
    console.error('Error fetching API content:', error);
    return null;
  }
}

async function fetchDiffAndProcess(revision) {
  let apiUrl = `${restApiEndpoint}/${FAMILY}/${LANGUAGE}/revision/${revision.oldrevision}`;
  if (revision.newrevision !== undefined) {
    apiUrl += `/compare/${revision.newrevision}`;
  }
  const diff = await fetchDiffFromAPI(apiUrl);
  let addedURLs = [];

  if (Array.isArray(diff)) { // actual diffs are arrays; new pages are strings
    // types 2 and 4 represent "from"
    // types 1 and 5 represent "to"
    // type 3 represents changes within a line. it will be harder to extract URL changes in this case
    let fromURLs = [];
    let toURLs = [];

    for (const diffLine of diff) {
      const lineURLs = extractAddedURLs(diffLine.text);
      for (const URL of lineURLs) {
        if (diffLine.type === 2 || diffLine.type === 4) {
          fromURLs.push(URL);
        } else if (diffLine.type === 1 || diffLine.type === 5) {
          toURLs.push(URL);
        }
      }
    }

    const toURLSet = new Set(toURLs);
    addedURLs = fromURLs.filter(url => !toURLSet.has(url));
  } else {
    addedURLs = extractAddedURLs(diff);
  }

  console.log(`Old revision: ${revision.oldrevision}
New revision: ${revision.newrevision}
API URL: ${apiUrl}
Revision element: ${revision.element.innerHTML}
Added URLs: ${addedURLs.join(' ')}
        `);

  const matchedWarnDomains = [];
  const matchedCautionDomains = [];
  const matchedInspectDomains = [];

  for (const url of addedURLs) {
    const hostname = new URL(url).hostname;
    const domain = getRootDomain(hostname, publicSuffixSet);

    if (warnList.has(domain) && !matchedWarnDomains.includes(domain)) {
      matchedWarnDomains.push(domain);
    } else if (cautionList.has(domain) && !matchedCautionDomains.includes(domain)) {
      matchedCautionDomains.push(domain);
    } else if (inspectList.has(domain) && !matchedInspectDomains.includes(domain)) {
      matchedInspectDomains.push(domain);
    }
  }

  if (matchedWarnDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, warnEmoji, matchedWarnDomains, msgWarning);
  }
  if (matchedCautionDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, cautionEmoji, matchedCautionDomains, msgCaution);
  }
  if (matchedInspectDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, inspectEmoji, matchedInspectDomains, msgInspect);
  }
}

async function fetchAndOrganizeDomainLists(pageName) {
  const params = {
    action: 'query',
    prop: 'revisions',
    titles: pageName,
    rvprop: 'content',
    rvslots: '*',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const page = data.query.pages;
    const pageId = Object.keys(page)[0];
    const content = page[pageId].revisions[0].slots.main['*'];

    const warnList = new Set();
    const cautionList = new Set();
    const inspectList = new Set();

    let currentList = null;

    const lines = content.split('\n');
    for (let line of lines) {
      if (line.trim() === warnSectionHeader) {
        currentList = warnList;
      } else if (line.trim() === cautionSectionHeader) {
        currentList = cautionList;
      } else if (line.trim() === inspectSectionHeader) {
        currentList = inspectList;
      }

      if (line.startsWith('*') && currentList) {
        const domain = line.substring(1).trim();
        currentList.add(domain);
      }
    }

    return {
      warnList,
      cautionList,
      inspectList
    };
  } catch (error) {
    console.error('Error fetching or parsing the page content:', error);
    throw error;
  }
}

async function fetchPreviousRevisionId(oldid) {
  const params = {
    action: 'query',
    prop: 'revisions',
    revids: oldid,
    rvprop: 'ids',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const pages = data.query.pages;
    const pageId = Object.keys(pages)[0];
    const revisions = pages[pageId].revisions;

    if (revisions && revisions.length > 0) {
      return revisions[0].parentid; // Get the parent revision ID
    } else {
      throw new Error('No revisions found for the given revision ID.');
    }
  } catch (error) {
    console.error('Error fetching previous revision ID:', error);
    return null;
  }
}

async function fetchFirstRevisionId(pageID) {
  const params = {
    action: 'query',
    pageids: pageID,
    prop: 'revisions',
    rvlimit: 1,
    rvdir: 'newer',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const pages = data.query.pages;
    const pageId = Object.keys(pages)[0];
    const revisions = pages[pageId].revisions;

    if (revisions && revisions.length > 0) {
      return revisions[0].revid;
    } else {
      throw new Error('No revisions found for this page.');
    }
  } catch (error) {
    console.error('Error fetching first revision ID:', error);
    return null;
  }
}

async function fetchDomainListPages(pageName) {
  const params = {
    action: 'query',
    prop: 'revisions',
    titles: pageName,
    rvprop: 'content',
    rvslots: '*',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const page = data.query.pages;
    const pageId = Object.keys(page)[0];
    const content = page[pageId].revisions[0].slots.main['*'];

    const pageTitles = [];
    const lines = content.split('\n');
    for (let line of lines) {
      if (line.startsWith('* [[')) {
        const match = line.match(/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
        if (match) {
          pageTitles.push(match[1]);
        }
      }
    }

    return pageTitles;
  } catch (error) {
    console.error('Error fetching or parsing the page content:', error);
    throw error;
  }
}

async function checkPageExists(pageName) {
  const params = {
    action: 'query',
    titles: pageName,
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const page = data.query.pages;
    const pageId = Object.keys(page)[0];
    return pageId !== "-1"; // Page exists if pageId is not "-1"
  } catch (error) {
    console.error('Error checking page existence:', error);
    return false;
  }
}

async function runScript() {
  publicSuffixSet = await fetchPublicSuffixList();
  if (publicSuffixSet.size === 0) {
    console.error('Public Suffix List loading failed');
    return;
  }

  const listPages = await fetchDomainListPages(listOfLists);
  for (const pageName of listPages) {
    const exists = await checkPageExists(pageName);
    if (exists) {
      const lists = await fetchAndOrganizeDomainLists(pageName);
      lists.warnList.forEach(warnList.add, warnList);
      lists.cautionList.forEach(cautionList.add, cautionList);
      lists.inspectList.forEach(inspectList.add, inspectList);
    }
  }

  const watchlistRevisions = await parseWatchlist();
  for (const revision of watchlistRevisions) {
    await fetchDiffAndProcess(revision);
  }
}

runScript().then(() => console.log('Citation Watchlist script loaded'));