User:Phlsph7/AddCitationNeededTagsToUnreferencedParagraphs.js

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* Script to automatically add citation-needed tags to unreferenced paragraph */

// <nowiki>
// utility function to retrieve the wikitext from the textbox
function getWikitext(){
	const textBox = document.getElementById('wpTextbox1');
	return textBox.value;
}

// utility function to set the wikitext of the textbox
function setWikitext(wikitext){
	const textBox = document.getElementById('wpTextbox1');
	textBox.value = wikitext;
}

function setEditSummary(){
	const input = document.getElementById('wpSummary');
	input.value += '"Citation needed" tags were added to unreferenced paragraphs, see [[WP:V]]';
}

// utility function to notify the user
function notify(counter, originalWikitext){
	// utility function to count citation needed tags
	function getCnTagCount(text){
		let lower = text.toLowerCase();
		let cnTagCount = 0;
		cnTagCount += lower.split('{{cn').length - 1
			+ lower.split('{{citation needed').length - 1
			+ lower.split('{{fact').length - 1;
		return cnTagCount;
	}
	
	// utility function to check for tokens
	function hasToken(text, tokens){
		for(let token of tokens){
			if(text.toLowerCase().includes(token)){
				return true;
			}
		}
		
		return false;
	}
	
	// Count of citation needed tags
	let originalCnTagCount = getCnTagCount(originalWikitext);
	let message = `Added ${counter} tags. The original text had ${originalCnTagCount} tags.`;
	
	// Info about preexisting templates
	let templates = [];
	let unreferencedTokens = ['{{unreferenced}}', '{{unreferenced|', '{{unreferenced |'];
	let unreferencedSectionTokens = ['{{unreferenced section'];
	let moreCitationNeededTokens = ['{{more citations needed'];
	
	// check whether any of those templates exist
	if(hasToken(originalWikitext, unreferencedTokens) 
		|| hasToken(originalWikitext, unreferencedSectionTokens) 
		|| hasToken(originalWikitext, moreCitationNeededTokens)){
		message += "The page already contains the following templates: ";
		
		// go through the templates one by one to inform the user
		if(hasToken(originalWikitext, unreferencedTokens)){
			message += "unreferenced, ";
		}
		if(hasToken(originalWikitext, unreferencedSectionTokens)){
			message += "unreferenced section, ";
		}
		if(hasToken(originalWikitext, moreCitationNeededTokens)){
			message += "more citations needed, ";
		}
		message = message.substring(0, message.length - 2);
	}
	
	mw.notify(message);
}

// utility function to create an array that maps each line of the wikitext to a section title
function getSectionLocations(lines){
	// utility function to check whether a line contains one of the main section headings
	function isMainHeadline(line){
		if(line.substring(0,2) == "==" && line.split('=').length == 5){
			return true;
		}
		else{
			return false;
		}
	}
	
	// utility function to extract the title by removing the wiki code
	function getHeadline(line){
		return line.split('==').join('').trim();
	}
	
	// array that maps each line of the wikitext to a section title
	let sectionLocations = [];
	
	// variable to hold the current section; the empty string corresponds to the lead section
	let currentSection = '';
	for(let line of lines){
		// change the current section if the current line is one of the main headlines
		if(isMainHeadline(line)){
			currentSection = getHeadline(line);
		}
		sectionLocations.push(currentSection);
	}
	
	return sectionLocations;
}

// for any line number, return the title of the section to which it belongs
function getSectionTitle(sectionLocations, lineNumber){
	return sectionLocations[lineNumber];
}

// add citation-needed tags to the wikitext
function addCnTags(){
	// utility function to exclude lines
	// all the criteria under which a line should not be checked for references
	function excludeLine(sectionLocations, lines, lineNumber){
		const line = lines[lineNumber];
		
		// minimum length
		if(line.length < 200){
			return true;
		}
		
		// the first character is checked to exclude non-paragraph lines, like templates or lists
		const firstCharacter = line[0];
		const excludedFirstCharacters = ['=', '{', '}', '*', '#', ':', '|', ' ', '<', '!'];
		if(excludedFirstCharacters.indexOf(firstCharacter) != -1){
			return true;
		}
		
		// the last sentence should be finished to ensure that it is not continued in the next line
		const trimmedLine = line.trimRight();
		const lastCharacter = trimmedLine[trimmedLine.length-1];
		const includedLastCharacters = ['.', '!', '?', "'", '"', '’', '”', '»'];
		if(includedLastCharacters.indexOf(lastCharacter) == -1){
			return true;
		}
		
		// references are not relevant for certain sections
		const sectionTitle = getSectionTitle(sectionLocations, lineNumber);
		const excludedSections = ['', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Plot', 'Synopsis'];
		if(excludedSections.indexOf(sectionTitle) != -1){
			return true;
		}
		
		// exclude gallery items (like in [[Visual art of Singapore]] and [[Abraham Lincoln]])
		if(line.substring(0, 5).toLowerCase() == "file:"
			|| line.substring(0, 6).toLowerCase() == "image:"){
			return true;
		}
		
		// exclude lines that already have citation-needed tags
		const cnTokens = ['{{citation', '{{cn', '{{fact'];
		for(let cnToken of cnTokens){
			if(line.toLowerCase().includes(cnToken)){
				return true;
			}
		}
		
		return false;
	}
	
	// get the wikitext and split it into individual lines
	const originalWikitext = getWikitext();
	const lines = originalWikitext.split('\n');
	const sectionLocations = getSectionLocations(lines);
	let counter = 0;
	
	// loop through the lines
	for(let i = 0; i < lines.length; i++){
		// skip the rest of the iteration if the exclusion criteria apply
		if(excludeLine(sectionLocations, lines, i)){
			continue;
		}
		
		// the following reference tokens indicate that the paragraph contains a reference
		let referenceTokens = ['<ref ', '<ref>', '</ref', '{{sfn', '{{harv', '{{r ', '{{r|', '{{r}}', '{{rma ', '{{rma|', '{{ran ', '{{ran|'];
		let hasReference = false;
		
		// loop through the reference tokens to check whether the line contains any of them
		for(let referenceToken of referenceTokens){
			if(lines[i].toLowerCase().includes(referenceToken)){
				hasReference = true;
				break;
			}
		}
		
		// if it has no references then add a citation-needed tag and count
		if(!hasReference){
			lines[i] = lines[i].trimRight() + "{{subst:cn}}"; // needs to be split up so that it is not automatically converted by wikipedia
			counter++;
		}
	}

	if(counter > 0){
		// the citation-needed tags were added to the lines, now they are converted back to wikitext
		let newWikitext = lines.join('\n');
		setWikitext(newWikitext);
		setEditSummary();
	}
	notify(counter, originalWikitext);
}

// anonymous main function
(function(){
	// restrict script to mainspace, userspace, and draftspace
	const namespaceNumber = mw.config.get('wgNamespaceNumber');
	const allowedNamespaces = [0, 2, 118];
	if (allowedNamespaces.indexOf(namespaceNumber) != -1) {
		
		// restrict to the edit page
		const allowedActions = ['edit', 'submit'];
		if(allowedActions.indexOf(mw.config.get("wgAction")) != -1){
			// add a link to the toolbox
			$.when(mw.loader.using('mediawiki.util'), $.ready).then(function (){
				var portletlink = mw.util.addPortletLink('p-tb', '#', 'Add "Citation needed" tags');
				
				// run the main function when the link is clicked
				portletlink.onclick = function(e) {
					e.preventDefault();
					addCnTags();
				};
			});
		}
	}
})();
// </nowiki>