// ==UserScript==
// @name OldSpeak
// @namespace http://www.fourmilab.ch/webtools/greasemonkey/OldSpeak/
// @description Translates legacy media Newspeak to plain English
// @include *
// @version 1.1
// @homepage http://www.fourmilab.ch/webtools/greasemonkey/

/*
			OldSpeak.user.js
	    by John Walker  -- http://www.fourmilab.ch/

    This Firefox Greasemonkey (http://www.greasespot.net/) user script
    (also usable with other browsers with a compatible user script
    facility) allows you to translate words or phrases in the body
    text of HTML/XHTML documents to alternative words or phrases,
    optionally with automatic modification for plural or other
    variant forms.  This allows you to convert Orwellian Newspeak
    in legacy media and other documents to old-fashioned plain
    English.

    You can add any words you wish translated to the "deftrans"
    call below.  If you require additional variant forms, add their
    expansions to the pluralise() function below it in the code.

    By default, the script is applied to all sites.  If you'd like to
    restrict it specific sites (for example, one or more legacy media
    outlets), use the Tools/Greasemonkey/Manage User Scripts menu
    item and edit the Included Pages and Excluded Pages accordingly.

    You can debug patterns and the behaviour of this script by setting
    the variables "brackets" and/or "showchanges" nonzero below.
    If "brackets" is nonzero, all changed words will be enclosed in
    {braces}.  If "showchanges" is nonzero, the original text and
    its replacement will be shown, separated by a Unicode right
    arrow character.

*/

/*  This script is derived from the Profanity Filter:
	http://userscripts.org/scripts/show/7286
    which in turn is based upon the Jmaxxz Vulgar Word Blocker
	http://userscripts.org/scripts/show/2287
    and the "Dumb Quotes" script:
	http://diveintogreasemonkey.org/casestudy/dumbquotes.html
    in Mark Pilgrim's "Dive into Greasemonkey":
	http://diveintogreasemonkey.org/
*/

// ==/UserScript==
(function() {

    //  Configuration parameters

    /*	Set "brackets" nonzero if you wish changed phrases to be
	enclosed in {brackets}.  */
    var brackets = 0;

    /*	Set "showchanges" nonzero to explicitly show transformation
	from the original word to the replacement.  */
    var showchanges = 0;

    var pattern = [], matches = [], translations = [];

    /*	Define the transformations.  The transformations are defined
	as key:value pairs.  The key is a regular expression which
	matches the word or phrase.  If the first character of the key
	is "=", the match will be case-sensitive, otherwise it will be
	case-insensitive.  The value is the replacement word.  If the
	pattern contains matching for plural or other variant forms,
	you must add code to the pluralise() function to recognise the
	variant match pattern and transform the replacement.  Any
	grouping within these patterns *must* be done with a (?: ... )
	specification to avoid interfering with the capture from the
	text string.

	You must be careful not to define a transformation whose
	replacement is matched by the pattern, for example:
	    "sense": "nonsense",    // DON'T DO THIS!
	as this will cause a CPU loop in the script.  */

    deftrans({
	//  Appeals to authority
	"experts?": "idiot",
	"analysts?": "moron",
	"politicians?": "crook",
	"=(?:Study|Report|Analysis)\\:": "Propaganda:",

	/*  The following terms are predefined for those who
	    may wish to include them in the words and phrases
	    translated, and also serve as a template for
	    creating new replacements.  Note that the key
	    values are strings from which regular expressions
	    will be formed, not regular expressions themselves.
	    Consequently, when you use the regular expression
	    escape character "\", you must use a double
	    backslash ("\\") to force the character in the
	    string literal.
	    
	    To enable one or more of these transformations,
	    simply remove the "//" from the start of the line
	    containing it.  */

	//  Issue euphemism
//	"affirmative\\s+action": "group quotas",
//	"global\\s+warming": "climate change",
//	"gun control": "victim disarmament",
//	"public\\s+assistance": "welfare",
//	"public\\s+schools?": "government school",

	//  Political spin
//	"liberals?": "collectivist",
//	"right[\\s\\-]?wing": "individualist",

	//  Political correctness
//	"african[\\s\\-]+americans?": "black",
//	"anti[\\s\\-]+choice": "anti-abortion",
//	"disabilit(?:y|ies)": "handicap",
//	"disabled": "handicapped",
//	"fetus": "unborn child",
//	"fetuses": "unborn children",
//	"gays?": "homosexual",
//	"(?:government|public)[\\s\\-]+funds?": "taxpayer fund",
//	"(?:government|public)[\\s\\-]+funded": "taxpayer funded",
//	"hearing[\\s\\-]+impaired": "deaf",
//	"homeless\\s+persons?": "bum",
//	"homeless\\s+people": "bums",
//	"native\\s+americans?": "Indian",
//	"pro[\\s\\-]+choice": "pro-abortion",
//	"pro[\\s\\-]+life": "anti-abortion",
//	"progressives?": "collectivist",
//	"rainforests?": "jungle",
//	"reproductive\\s+rights": "right to abortion",
//	"root\\s+causes?": "excuse",
//	"trial\\s+lawyers?": "ambulance chaser",
//	"undocumented\\s+workers?": "illegal alien",
//	"(?:vision|visually)[\\s\\-]+impaired": "blind",
//	"wetlands?": "swamp",
    });

    //	Build the table of replacements

    function deftrans(replacements) {
      for (var word in replacements) {
	  pattern.push(word);
	  var w = word.replace(/^=/, "");
	  matches.push(new RegExp("([^a-zA-Z])(" + w + ")([^a-zA-Z\u2192])",
	      word.match(/^=/) ? "g" : "gi"));
	  translations.push(replacements[word]);
	}
    }

    // Perform the replacements

    var curpat;     	    // Hidden argument to repmatch()

    /*	If pattern has a plural variant and the word matched
	conforms to it, pluralise the replacement word.  Note
	that the replacement word need not used the same
	pluralisation convention as the matched word.  */

    function pluralise(word, replacement) {
	if (((pattern[curpat]).match(/\(\?:y\|ies\)$/) && word.match(/ies$/i)) ||
	    ((pattern[curpat]).match(/s\(\?:es\)\?$/) && word.match(/ses$/i)) ||
	    ((pattern[curpat]).match(/s\?$/) && word.match(/s$/i))) {
	    if (replacement.match(/y$/)) {
		replacement = replacement.substr(0, replacement.length - 1) + "ies";
	    } else if (replacement.match(/s$/)) {
		replacement += "es";
	    } else {
		replacement += "s";
	    }
	}
	return replacement;
    }

    /*  Replace matched sequence with possibly pluralised and
        case conforming replacement.  */

    function repmatch(matched, before, word, after) {
	var replacement = pluralise(word, translations[curpat]);
	if (word.match(/^[A-Z]/)) {
            //  Word replacement has an initial capital.  Capitalise replacement
	    if (word.match(/^[^a-z]+$/)) {
                //  Word contains no lower case letters.  All-caps replacement
		replacement = replacement.toUpperCase();
	    } else {
                /*  Capitalise first letter of replacement and, if more than one
                    word, the first letter of each subsequent word in the replacement
                    if the match is a single word or if each word of a multi-word
                    match is capitalised.  */
                if (word.match(/^[A-Z]([a-z]*\s[A-Z])+/)) {
                    replacement = replacement.replace(/(?:^.|\s[a-z])/g,
                        function(m) { return m.toUpperCase(); });
                } else {
                    replacement = replacement.replace(/^./,
                        function(m) { return m.toUpperCase(); });
                }
	    }
	}
	return before +
	       (brackets ? "{" : "") +
	       (showchanges ? (word + "\u2192") : "") +
	       replacement +
	       (brackets ? "}" : "") +
	       after;
    }

    function transform(s) {
	for (curpat = 0; curpat < matches.length; curpat++) {
	    s = s.replace(matches[curpat], repmatch);
	}
	return s;
    }
    
    //	We only modify HTML/XHTML documents
    if (document.contentType &&
    	(!(document.contentType.match(/html/i)))) {
    	return;
    }
    
    //	We only modify documents retrieved with HTTP/HTTPS
    if (document.URL &&
        (!(document.URL.match(/^https?:/i)))) {
        return;
    }

    // Replace in document title

    if (document.title) {
	var t = transform(" " + document.title + " ");
	document.title = t.substring(1, t.length - 1)
    }

    // Replace in body copy

    var textnodes = document.evaluate("//body//text()", document, null,
	    XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
	for (var i = 0; i < textnodes.snapshotLength; i++) {
	    node = textnodes.snapshotItem(i);
	    /* Test whether this text node appears within a
	       <style>, <script>, or <textarea> container.
	       If so, it is not actual body text and must
	       be left alone to avoid wrecking the page. */
	    if (node.parentNode.tagName != "STYLE" &&
		node.parentNode.tagName != "TEXTAREA" &&
		node.parentNode.tagName != "SCRIPT") {
		/* Many documents have large numbers of empty text nodes.
		   By testing for them, we avoid running all of our
		   regular expressions over a target which they can't
		   possibly match. */
		if (!(node.data.match(/^\s*$/))) {
		    var s = " " + node.data + " ";
		    var d = s;
		    do {
			var od = d;
			d = transform(d);
		    } while (od != d);
		    if (s != d) {
			node.data = d.substring(1, d.length - 1);
		    }
		}
	    }
	}

})();
