User:Wmahan/despam.js
From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Firefox/Mozilla/Safari: hold down Shift while clicking Reload (or press Ctrl-Shift-R), Internet Explorer: press Ctrl-F5, Opera/Konqueror: press F5.
var despamPage = 'User:Wmahan/despam'; var query_php = '/w/query.php'; var wiki = 'http://en.wikipedia.org/wiki/'; var wPage = 'http://en.wikipedia.org/w/index.php'; var tab = ' '; // number of history entries to fetch at a time var revPage = 50; // stop after searching this many revisions var maxRevs = 5000; var http_request; var rvoffset; var despamPage, despamUrl, despamUserRe; var logDiv; // parse parameters passed in the query string function parseParams() { var query = window.location.search.substring(1); var params = new Array(); var qlist = query.split('&'); for (var i=0; i < qlist.length; i++) { var pos = qlist[i].indexOf('='); if (pos > 0) { var key = unescape(qlist[i].substring(0,pos)); var val = unescape(qlist[i].substring(pos+1)); params[key] = val; } } return params; } // messages indicating success or failure in removing a link var removeSuccess = '<span style="color: #008000">link removed</span>\n'; var removeFailure = '<span style="color: #a00000">unable to remove link</span>\n'; // save the edit page and close this window function saveAndClose() { window.onunload = function() { window.close(); }; document.getElementById('wpSave').click(); } // click the diff button on the edit page function doDiff() { document.getElementById('wpDiff').click(); } // on the edit page, remove the link "url", noting in the // edit summary that it was added by "user" on "timestamp" function removeLink(url, user, timestamp) { var es = document.getElementById('wpSummary'); if (es && es.value == '') { document.getElementById('wpMinoredit').checked = true; // remove the link; var tb = document.getElementById('wpTextbox1'); var cont = document.getElementById('content'); logDiv = document.createElement('div'); logDiv.id = 'despamLog'; // XXX logDiv.innerHTML = "<hr />\n<a name=\"despam\"></a>\n" + "<h2>Removing link</h2><br />\n" + "<input type='button' value='Save page and close window'" + " onclick='saveAndClose()' /> ***" + " <input type='button' value='Show changes'" + " onclick='doDiff()' />\n"; cont.appendChild(logDiv); // turn url into a regex var regexChars = new RegExp('([!*+?^\\\\$\\]\\[.])', 'g'); url = url.replace(regexChars, '\\$1'); //log('url regex: ' + url); // now look for various externa link styles var cb = function(a) { log('deleted line: <span style="color: #c00000">' + a + '</span>'); return '\n'; }; // *[http://www.example.com link] plus any following text //log('regex1: \\n\\** *\\[' + url + '[^\\n]*?\\][^\\n]*?\\n'); var re1 = new RegExp('\\n\\**[^\\n]*\\[' + url + '[^\\n]*?\\][^\\n]*?\\n', 'g'); var newText = tb.value.replace(re1, cb); // *http://www.example.com plus any following text //log('regex2: \\n\\** *' + url + '[^\\n]*?\\n'); //var re2 = new RegExp('\\n\\** *' + url + '[^\\n]*?\\n', 'g'); //newText = newText.replace(re2, '\n'); if (newText != tb.value) { log(removeSuccess); tb.value = newText; es.value = 'rm linkspam by [[Special:Contributions/' + user + '|' + user + ']] on ' + timestamp; } else { log(removeFailure); } document.location.href = '#despam'; } } // write the log message "msg" to the logging area function log(msg) { var div = document.createElement('div'); div.innerHTML = msg; // XXX if (logDiv) { logDiv.appendChild(div); } } // fetch "url" in with a synchronous (blocking) call function sync_fetch(url) { log('fetching ' + url + '...'); var http_request2 = new XMLHttpRequest(); http_request2.open("GET", url, false); // synchronous http_request2.send(null); return http_request2.responseText; } // Check whether the link "url" is in "page" revision // "afterid", but not in "beforeid" function wasLinkAdded(page, url, beforid, afterid) { qpage = wiki + page + '?action=raw&oldid='; beforeText = sync_fetch(qpage + beforeid); if (beforeText.indexOf(url) != -1) { // link already existed; it was not added in the next edit return false; } afterText = sync_fetch(qpage + afterid); if (afterText.indexOf(url) != -1) { // it was added return true; } else { // it wasn't added, and there's no point searching // further back in the history // XXX doesn't consider vandalism //stopSearch = true; return false; } } // examine the list of history entries "results" // for edits by a user matching "userRe" that add // the link "url" function processHistory(results, url, page, userRe) { var pages = results['pages']; //var info = pages.shift(); var info; for (var i in pages) { // XXX hack to get first element of associative array info = pages[i]; break; } var revs = info['revisions']; var found = false; var i = -1; for (var prevI in revs) { if (i == -1) { i = prevI; continue; } //alert('i=' + i + '; user=' + revs[i]['user']); var curuser = revs[i]['user']; if (curuser.match(userRe)) { // found a possible match afterid = revs[i]['revid']; beforeid = revs[prevI]['revid']; log('checking possible match: ' + curuser + ' on ' + revs[i]['timestamp']); //alert('beforeid=' + beforeid + '; afterid=' + afterid); if (wasLinkAdded(page, url, beforeid, afterid)) { found = true; break; } else { log(tab + 'no match'); } } i = prevI; } if (found) { timestamp = revs[i]['timestamp']; log('found addition of link by ' + curuser + ' on ' + timestamp + ' (<a target="_blank" href="' + wPage + '?title=' + escape(page) + '&diff=' + afterid + '&oldid=' + beforeid + '">diff</a>, <a href="' + wPage + '?title=' + escape(page) + '&action=edit&fakeaction=rmlink' + '&user=' + escape(curuser) + '×tamp=' + escape(timestamp) + '&url=' + escape(url) + '">remove link</a>)'); //removeLink(page, url, curuser, timestamp); return true; } else { rvoffset += revPage; if (rvoffset > maxRevs || revs.length < revPage) { // we reached the end without finding anything log('<span style="color: #aa0000">No match found!</span>'); return false; } else { // go on to the next page fetchHistory(); } } } // set everythig up and start fetching pages of history entries function doDespam(url, page, users) { var regexCharsExceptStar = new RegExp('([!+?^\\\\$\\]\\[.])', 'g'); users = users.replace(regexCharsExceptStar, '\\$1'); // turn wildcards into regexes starRe = new RegExp('\\*', 'g'); users = users.replace(starRe, '\\d+'); // remove extraneous spaces users = users.replace(/ +/g, ' '); users = users.replace(/(^ +| +$)/g, ''); var userlist = users.split(':'); var userRe = new RegExp('(' + userlist.join('|') + ')'); if (!userRe) { log('Error: invalid user list'); log('debugging info: (' + userlist.join('|') + ')'); return false; } nicePage = page.replace(/_/g, ' '); page = page.replace(/ /g, '_'); log('<h2>Scanning history for <a href="' + wiki + escape(page) + '">' + nicePage + '</a> (<a href="' + wPage + '?title=' + escape(page) + '&action=history">history</a>)</h2>\n'); rvoffset = 0; despamPage = page; despamUrl = url; despamUserRe = userRe; http_request = new XMLHttpRequest(); fetchHistory(); } // fetch function fetchHistory() { log('fetching history entries #' + rvoffset + ' through #' + (rvoffset + revPage)); // fetch the query page var qpage = query_php + '?what=revisions&format=json&rvlimit=' + revPage + '&rvoffset=' + rvoffset + '&titles=' + escape(despamPage); var results; http_request.open("GET", qpage, true); http_request.onreadystatechange = function () { if (http_request.readyState == 4) { if (http_request.status == 200) { results = eval("(" + http_request.responseText + ")"); processHistory(results, despamUrl, despamPage, despamUserRe); } else { log('<span style="color: #aa0000">There was a problem querying the page history.</span>'); return false; } } //http_request = null; }; http_request.send(null); return true; } function despamClick(url, page) { var despamUrl = '/wiki/' + despamPage; users = document.getElementById('despamUsers').value; if (users == '') { alert('No user name or IP address was entered'); return false; } url = escape(url); page = escape(page); users = escape(users); window.open(despamUrl + '?url=' + url + '&page=' + page + '&users=' + users); return false; } addOnloadHook( function() { if ((location.href.indexOf(':Linksearch') != -1 || location.href.indexOf('%3ALinksearch') != -1) && location.href.indexOf('target=') != -1) { var textLabel = 'IPs or usernames for despam (e.g. <i>SpamUser</i>, <i>192.168.0.*</i>):<br />'; var cont = document.getElementById("content"); var html = cont.innerHTML; // XXX un-DOM var re = new RegExp('<li>(<a href.*?>(.*?)</a>.*?<a .*?>(.*?)</a>)</li>', 'g'); var quoteChars = new RegExp('([\'\\\\])', 'g'); var cb = function(a, b, c, d) { var skipPages = new RegExp('(talk|user):', 'i'); if (!d.match(skipPages)) { // escape quotes c = c.replace(quoteChars, '\\$1'); d = d.replace(quoteChars, '\\$1'); return '<li>' + b + ' [<a href="" onclick="return despamClick(\'' + c + '\',\'' + d + '\')">despam</a>]</li>'; } else { return a; // skip entry } }; div = document.createElement('div'); div.innerHTML = textLabel; // XXX inp = document.createElement('input'); inp.type = 'text'; inp.id = 'despamUsers'; inp.size = 40; div.appendChild(inp); html = html.replace(re, cb); cont.innerHTML = html; bc = document.getElementById('bodyContent'); bc.insertBefore(div, document.getElementsByTagName('ol')[0]); } else if (document.location.href.indexOf(despamPage) != -1 && document.location.href.indexOf('action=') == -1) { logDiv = document.getElementById('despamLog'); var params = parseParams(); if (params['url'] && params['page'] && params['users']) { doDespam(params['url'], params['page'], params['users']); } else { log('<span style="color: #aa0000">The url, page, or users parameter was missing.</span>'); } } else if (document.location.href.indexOf('action=edit&fakeaction=rmlink') != -1) { var params = parseParams(); if (params['url'] && params['user'] && params['timestamp']) { removeLink(params['url'], params['user'], params['timestamp']); } } } );