20,741
edits
m (→The script: expose the regex solver via the UI) |
|||
Line 446: | Line 446: | ||
// @name:it Instant-Cquotes | // @name:it Instant-Cquotes | ||
// @license public domain | // @license public domain | ||
// @version 0. | // @version 0.39 | ||
// @date 2016-05-05 | // @date 2016-05-05 | ||
// @description Automatically converts selected FlightGear mailing list and forum quotes into post-processed MediaWiki markup (i.e. cquotes). | // @description Automatically converts selected FlightGear mailing list and forum quotes into post-processed MediaWiki markup (i.e. cquotes). | ||
Line 509: | Line 509: | ||
// TODO: move to GreaseMonkey/UI host | // TODO: move to GreaseMonkey/UI host | ||
// prevent conflicts with jQuery used on webpages: https://wiki.greasespot.net/Third-Party_Libraries#jQuery | // prevent conflicts with jQuery used on webpages: https://wiki.greasespot.net/Third-Party_Libraries#jQuery | ||
// http://stackoverflow.com/a/5014220 | |||
this.$ = this.jQuery = jQuery.noConflict(true); | this.$ = this.jQuery = jQuery.noConflict(true); | ||
Line 575: | Line 576: | ||
validate: function(host) { | validate: function(host) { | ||
if( | if (host.get_persistent('startup.disable_validation',false)) return; | ||
if(Environment.scriptEngine !== "Greasemonkey") | |||
console.log("NOTE: This script has not been tested with script engines other than GreaseMonkey recently!"); | console.log("NOTE: This script has not been tested with script engines other than GreaseMonkey recently!"); | ||
var dependencies = [ | var dependencies = [ | ||
{name:'jQuery', test: function() {} }, | {name:'jQuery', test: function() {} }, | ||
{name:'genetic.js', test: function() {} } | {name:'genetic.js', test: function() {} }, | ||
{name:'synaptic', test: function() {} }, | |||
]; | ]; | ||
}, | |||
[].forEach.call(dependencies, function(dep) { | |||
console.log("Checking for dependency:"+dep.name); | |||
var status=false; | |||
try { | |||
dep.test.call(undefined); | |||
status=true; | |||
} | |||
catch(e) { | |||
status=false; | |||
} | |||
finally { | |||
var success = (status)?'==> success':'==> failed'; | |||
console.log(success); | |||
return status; | |||
} | |||
}); | |||
}, // validate | |||
// this contains unit tests for checking crucial APIs that must work for the script to work correctly | // this contains unit tests for checking crucial APIs that must work for the script to work correctly | ||
Line 650: | Line 671: | ||
// To see this working, you need to package up the whole file as a firefox xpi using "jpm xpi" | // To see this working, you need to package up the whole file as a firefox xpi using "jpm xpi" | ||
// and then start the whole thing via "jpm run", to do that, you also need a matching package.json (i.e. via jpm init) | // and then start the whole thing via "jpm run", to do that, you also need a matching package.json (i.e. via jpm init) | ||
// ALSO: you will have to explicitly install any dependencies using jpm | |||
FirefoxAddon: { | FirefoxAddon: { | ||
init: function() { | init: function() { | ||
Line 668: | Line 690: | ||
}, | }, | ||
registerConfigurationOption: function() { | registerConfigurationOption: function(name, callback, hook) { | ||
// https://developer.mozilla.org/en-US/Add-ons/SDK/Tutorials/Add_a_Context_Menu_Item | // https://developer.mozilla.org/en-US/Add-ons/SDK/Tutorials/Add_a_Context_Menu_Item | ||
console.log(" | console.log("config menu support n/a in firefox mode"); | ||
// https://developer.mozilla.org/en-US/Add-ons/SDK/Tutorials/Using_third-party_modules_%28jpm%29 | |||
var menuitems = require("menuitem"); | |||
var menuitem = menuitems.Menuitem({ | |||
id: "clickme", | |||
menuid: "menu_ToolsPopup", | |||
label: name, | |||
onCommand: function() { | |||
console.log("menuitem clicked:"); | |||
callback(); | |||
}, | |||
insertbefore: "menu_pageInfo" | |||
}); | |||
}, | }, | ||
Line 680: | Line 714: | ||
label: "Instant Cquote", | label: "Instant Cquote", | ||
context: contextMenu.SelectionContext(), | context: contextMenu.SelectionContext(), | ||
// https://developer.mozilla.org/en/Add-ons/SDK/Guides/Two_Types_of_Scripts | |||
// https://developer.mozilla.org/en-US/Add-ons/SDK/Guides/Content_Scripts | |||
contentScript: 'self.on("click", function () {' + | contentScript: 'self.on("click", function () {' + | ||
' var text = window.getSelection().toString();' + | ' var text = window.getSelection().toString();' + | ||
Line 686: | Line 722: | ||
onMessage: function (selectionText) { | onMessage: function (selectionText) { | ||
console.log(selectionText); | console.log(selectionText); | ||
instantCquote(selectionText); | |||
} | } | ||
}); | }); | ||
Line 706: | Line 743: | ||
return default_value;}, | return default_value;}, | ||
set_persistent: function(key, value) { | set_persistent: function(key, value) { | ||
console.log("persistence stubs not yet filled in !"); | console.log("firefox persistence stubs not yet filled in !"); | ||
}, | }, | ||
Line 717: | Line 754: | ||
clipboard.set(content); | clipboard.set(content); | ||
} //set_cliipboard | } //set_cliipboard | ||
}, // end of FireFox addon config | }, // end of FireFox addon config | ||
Line 921: | Line 959: | ||
// the first thing we need to do is to determine what APIs are available | // the first thing we need to do is to determine what APIs are available | ||
// and store everything in a Host hash, which is used for API lookups | // and store everything in a Host hash, which is subsequently used for API lookups | ||
// the Host hash contains all platform/browser-specific APIs | // the Host hash contains all platform/browser-specific APIs | ||
var Host = Environment.getHost(); | var Host = Environment.getHost(); | ||
Environment.validate(Host); // this checks the obtained host to see if all required dependencies are available | |||
Host.init(); // run environment specific initialization code (e.g. logic for GreaseMonkey setup) | Host.init(); // run environment specific initialization code (e.g. logic for GreaseMonkey setup) | ||
Line 1,060: | Line 1,099: | ||
enabled: true, | enabled: true, | ||
type: 'archive', | type: 'archive', | ||
event: 'document.onmouseup', // when to invoke the event handler | event: 'document.onmouseup', // when to invoke the event handler (not used atm) | ||
event_handler: null, // the event handler to be invoked | event_handler: null, // the event handler to be invoked (not used atm) | ||
url_reg: /https:\/\/forum\.flightgear\.org\/.*/, | url_reg: /https:\/\/forum\.flightgear\.org\/.*/, | ||
content: { | content: { | ||
xpath: '', //TODO: this must be added for downloadPosting() to work | xpath: '', //TODO: this must be added for downloadPosting() to work, or it cannot extract contents | ||
selection: getSelectedHtml, | selection: getSelectedHtml, | ||
idStyle: /p[0-9]{6}/, | idStyle: /p[0-9]{6}/, | ||
Line 1,093: | Line 1,132: | ||
url: 'https://forum.flightgear.org/viewtopic.php?f=18&p=284108#p284108', | url: 'https://forum.flightgear.org/viewtopic.php?f=18&p=284108#p284108', | ||
author: 'mickybadia', | author: 'mickybadia', | ||
date: '', | date: 'May 3rd, 2016', | ||
title: 'OSM still PNG maps' | title: 'OSM still PNG maps' | ||
}, | }, | ||
Line 1,099: | Line 1,138: | ||
url: 'https://forum.flightgear.org/viewtopic.php?f=19&p=284120#p284120', | url: 'https://forum.flightgear.org/viewtopic.php?f=19&p=284120#p284120', | ||
author: 'Thorsten', | author: 'Thorsten', | ||
date: '', | date: 'May 3rd, 2016', | ||
title: 'Re: FlightGear\'s Screenshot Of The Month MAY 2016' | title: 'Re: FlightGear\'s Screenshot Of The Month MAY 2016' | ||
}, | }, | ||
Line 1,105: | Line 1,144: | ||
url: 'https://forum.flightgear.org/viewtopic.php?f=71&t=29279&p=283455#p283446', | url: 'https://forum.flightgear.org/viewtopic.php?f=71&t=29279&p=283455#p283446', | ||
author: 'Hooray', | author: 'Hooray', | ||
date: '', | date: 'Apr 25th, 2016', | ||
title: 'Re: Best way to learn Canvas?' | title: 'Re: Best way to learn Canvas?' | ||
}, | }, | ||
Line 1,111: | Line 1,150: | ||
url: 'https://forum.flightgear.org/viewtopic.php?f=4&t=1460&p=283994#p283994', | url: 'https://forum.flightgear.org/viewtopic.php?f=4&t=1460&p=283994#p283994', | ||
author: 'bugman', | author: 'bugman', | ||
date: '', | date: 'May 2nd, 2016', | ||
title: 'Re: eurofighter typhoon' | title: 'Re: eurofighter typhoon' | ||
} // add other tests below | } // add other tests below | ||
Line 1,118: | Line 1,157: | ||
author: { | author: { | ||
xpath: 'div/div[1]/p/strong/a/text()', | xpath: 'div/div[1]/p/strong/a/text()', | ||
transform: [] | transform: [] // no transformations applied | ||
}, | }, | ||
title: { | title: { | ||
xpath: 'div/div[1]/h3/a/text()', | xpath: 'div/div[1]/h3/a/text()', | ||
transform: [] | transform: [] // no transformations applied | ||
}, | }, | ||
date: { | date: { | ||
Line 1,177: | Line 1,216: | ||
<li><a href="#about">About</a></li> | <li><a href="#about">About</a></li> | ||
</ul> | </ul> | ||
<div id="selection">This tab contains your extracted and post-processed selection | <div id="selection">This tab contains your extracted and post-processed selection, converted to proper wikimedia markup, including proper attribution. | ||
<div id="content"/> | <div id="content"> | ||
<label for="template_select">Select a template</label> | |||
<select name="template_select" id="template_select"> | |||
<option>default</option> | |||
<option>cquote</option> | |||
</select> | |||
</div> | |||
<div id="options"> | <div id="options"> | ||
<b>Note this is work-in-progress, i.e. not yet fully functional</b><br/> | <b>Note this is work-in-progress, i.e. not yet fully functional</b><br/> | ||
Line 1,229: | Line 1,276: | ||
<select name="template_select" id="template_select"> | <select name="template_select" id="template_select"> | ||
<option>default</option> | <option>default</option> | ||
<option>cquote</option> | |||
</select> | </select> | ||
Line 1,240: | Line 1,288: | ||
<button id="evolve_regex">Evolve regex</button><p/> | <button id="evolve_regex">Evolve regex</button><p/> | ||
<button id="test_perceptron">Test Perceptron</button><p/> | <button id="test_perceptron">Test Perceptron</button><p/> | ||
<div id="output">< | <div id="output"> | ||
<table id="results"> | |||
<thead> | |||
<tr> | |||
<th>Generation</th> | |||
<th>Fitness</th> | |||
<th>Expression</th> | |||
<th>Result</th> | |||
</tr> | |||
</thead> | |||
<tbody> | |||
</tbody> | |||
</table> | |||
<!-- | |||
<textarea id="devel_output" lines="10"></textarea><p/> | <textarea id="devel_output" lines="10"></textarea><p/> | ||
--> | |||
</div> | </div> | ||
</div> | </div> | ||
Line 1,255: | Line 1,319: | ||
var evolve_regex = $('div#development button#evolve_regex', markup); | var evolve_regex = $('div#development button#evolve_regex', markup); | ||
evolve_regex.click(function() { | evolve_regex.click(function() { | ||
alert("Evolve regex"); | //alert("Evolve regex"); | ||
evolve_expression_test(); | |||
}); | }); | ||
Line 1,265: | Line 1,330: | ||
// add dynamic elements to each tab | // add dynamic elements to each tab | ||
// NOTE: this affects all template selectors, on all tabs | |||
$('select#template_select', markup).change(function() { | |||
UI.alert("Sorry, templates are not yet fully implemented (WIP)"); | |||
}); | |||
var help = $('#helpButton', markup); | var help = $('#helpButton', markup); | ||
Line 1,290: | Line 1,360: | ||
// TODO: Currently, this is hard-coded, but should be made customizable via the "articles" tab at some point ... | // TODO: Currently, this is hard-coded, but should be made customizable via the "articles" tab at some point ... | ||
var articles = [ | var articles = [ | ||
// NOTE: category must match an existing <optgroup> | // NOTE: category must match an existing <optgroup> above, title must match an existing wiki article | ||
{category:'support', name:'Frequently asked questions', url:''}, | {category:'support', name:'Frequently asked questions', url:''}, | ||
{category:'support', name:'Asking for help', url:''}, | {category:'support', name:'Asking for help', url:''}, | ||
Line 1,296: | Line 1,366: | ||
{category:'news', name:'Next changelog', url:''}, | {category:'news', name:'Next changelog', url:''}, | ||
{category:'release', name:'Release plan/Lessons learned', url:''}, // TODO: use wikimedia template | {category:'release', name:'Release plan/Lessons learned', url:''}, // TODO: use wikimedia template | ||
{category:'develop', name:'Nasal library', url:''} | {category:'develop', name:'Nasal library', url:''}, | ||
{category:'develop', name:'Canvas Snippets', url:''}, | |||
]; | ]; | ||
Line 1,599: | Line 1,670: | ||
// The main function | // The main function | ||
// TODO: split up, so that we can reuse the code elsewhere | // TODO: split up, so that we can reuse the code elsewhere | ||
function instantCquote() { | function instantCquote(sel) { | ||
var profile = getProfile(); | var profile = getProfile(); | ||
// TODO: use config hash here | // TODO: use config hash here | ||
var selection = document.getSelection() | var selection = document.getSelection(), | ||
post_id=0; | post_id=0; | ||
Line 1,624: | Line 1,691: | ||
Host.dbLog('Selection is not valid, aborting function'); | Host.dbLog('Selection is not valid, aborting function'); | ||
return; | return; | ||
} | } | ||
try { | |||
transformationLoop(profile, post_id); | |||
} | |||
catch(e) { | |||
UI.alert("Transformation loop:\n"+e.message); | |||
} | |||
} // instantCquote | |||
// TODO: this needs to be refactored so that it can be also reused by the async/AJAX mode | // TODO: this needs to be refactored so that it can be also reused by the async/AJAX mode | ||
// to extract fields in the background (i.e. move to a separate function) | // to extract fields in the background (i.e. move to a separate function) | ||
function transformationLoop(profile, post_id) { | |||
var output = {}, field; | |||
Host.dbLog("Starting extraction/transformation loop"); | Host.dbLog("Starting extraction/transformation loop"); | ||
for (field in profile) { | for (field in profile) { | ||
Line 1,648: | Line 1,723: | ||
var outputPlain = createCquote(output); | var outputPlain = createCquote(output); | ||
outputText(outputPlain, output); | outputText(outputPlain, output); | ||
} // transformationLoop() | |||
Line 1,658: | Line 1,733: | ||
for (var profile in CONFIG) { | for (var profile in CONFIG) { | ||
if (CONFIG[profile].type != 'archive' || !CONFIG[profile].enabled ) continue; // skip the wiki entry, because it's not an actual archive that we need to test | if (CONFIG[profile].type != 'archive' || !CONFIG[profile].enabled ) continue; // skip the wiki entry, because it's not an actual archive that we need to test | ||
// should be really moved to downloadPostign | |||
if (CONFIG[profile].content.xpath === '') console.log("xpath for content extraction is empty, cannot procedurally extract contents"); | |||
for (var test in CONFIG[profile].tests) { | for (var test in CONFIG[profile].tests) { | ||
var required_data = CONFIG[profile].tests[test]; | var required_data = CONFIG[profile].tests[test]; | ||
Line 1,958: | Line 2,034: | ||
OUTPUT.msgbox(msg); | OUTPUT.msgbox(msg); | ||
} | } | ||
}// ############# | } | ||
// ############# | |||
// # Utilities # | // # Utilities # | ||
// ############# | // ############# | ||
Line 2,143: | Line 2,221: | ||
function evolve_expression_test() { | |||
try { | |||
var genetic = Genetic.create(); | var genetic = Genetic.create(); | ||
Line 2,157: | Line 2,237: | ||
function randomString(len) { | function randomString(len) { | ||
var text = ""; | var text = ""; | ||
var charset = "\\abcdefghijklmnopqrstuvwxyz0123456789 ()<>*.,"; | var charset = "\\abcdefghijklmnopqrstuvwxyz0123456789[] ()<>*.,"; | ||
for(var i=0;i<len;i++) | for(var i=0;i<len;i++) | ||
text += charset.charAt(Math.floor(Math.random() * charset.length)); | text += charset.charAt(Math.floor(Math.random() * charset.length)); | ||
Line 2,227: | Line 2,307: | ||
var validExp = 0.1; | var validExp = 0.1; | ||
var hasToken = 0.1; | var hasToken = 0.1; | ||
var t = this.userData.tests[0].haystack; | |||
//var regex = new RegExp(this.userData.solution); | |||
//var output = t.match( new RegExp("From: (.*) <.*@.*>"))[1]; | |||
// TODO: use search & match for improving the fitness | |||
if (0) | |||
try { | |||
var regex = new RegExp(entity); | |||
var output = t.search( regex); | |||
validExp = 10; | |||
} | |||
catch(e) { | |||
validExp = 2; | |||
} | |||
var i; | var i; | ||
for (i=0;i<entity.length;++i) { | for (i=0;i<entity.length;++i) { | ||
Line 2,239: | Line 2,337: | ||
return fitness + (1*validExp + 1* hasToken); | return fitness; // + (1*validExp + 1* hasToken); | ||
}; | }; | ||
Line 2,246: | Line 2,344: | ||
return pop[0].entity != this.userData["solution"]; | return pop[0].entity != this.userData["solution"]; | ||
}; | }; | ||
genetic.notification = function(pop, generation, stats, isFinished) { | genetic.notification = function(pop, generation, stats, isFinished) { | ||
Line 2,258: | Line 2,356: | ||
if (pop != 0 && value == this.last) | if (pop != 0 && value == this.last) | ||
return; | return; | ||
var solution = []; | var solution = []; | ||
var i; | var i; | ||
for (i=0;i<value.length;++i) { | for (i=0;i<value.length;++i) { | ||
var diff = value.charCodeAt(i) - this.last.charCodeAt(i); | |||
var style = "background: transparent;"; | |||
if (diff > 0) { | |||
style = "background: rgb(0,200,50); color: #fff;"; | |||
} else if (diff < 0) { | |||
style = "background: rgb(0,100,50); color: #fff;"; | |||
} | |||
solution.push("<span style=\"" + style + "\">" + value[i] + "</span>"); | |||
} | |||
var t = this.userData.tests[0].haystack; | |||
//console.log("haystack is:"+t); | |||
// "From: John Doe <John@do...> - 2020-07-02 17:36:03", needle: "John Doe"}, /From: (.*) <.*@.*>/ | |||
var regex = new RegExp(this.userData.solution); | |||
//var output = t.match( new RegExp("From: (.*) <.*@.*>"))[1]; | |||
// TODO: use search & match for improving the fitness | |||
var output = t.search( new RegExp(value)); | |||
var buf = ""; | |||
buf += "<tr>"; | |||
buf += "<td>" + generation + "</td>"; | |||
buf += "<td>" + pop[0].fitness.toPrecision(5) + "</td>"; | |||
buf += "<td>" + solution.join("") + "</td>"; | |||
buf += "<td>" + output + "</td>"; | |||
buf += "</tr>"; | |||
$("#results tbody").prepend(buf); | |||
this.last = value; | |||
}; | |||
/* | |||
genetic.notification2 = function(pop, generation, stats, isFinished) { | |||
function lerp(a, b, p) { | |||
return a + (b-a)*p; | |||
} | |||
var value = pop[0].entity; | |||
this.last = this.last||value; | |||
if (pop != 0 && value == this.last) | |||
return; | |||
var solution = []; | |||
var i; | |||
for (i=0;i<value.length;++i) { | |||
solution.push(value[i]); | solution.push(value[i]); | ||
} | } | ||
Line 2,270: | Line 2,418: | ||
this.last = value; | this.last = value; | ||
}; | }; | ||
*/ | |||
var config = { | var config = { | ||
"iterations": 4000 | "iterations": 4000 | ||
Line 2,279: | Line 2,426: | ||
, "crossover": 0.3 | , "crossover": 0.3 | ||
, "mutation": 0.4 | , "mutation": 0.4 | ||
, "skip": | , "skip": 30 // notifications | ||
//, "webWorkers": false | //, "webWorkers": false | ||
}; | }; | ||
Line 2,291: | Line 2,438: | ||
var regexTests = [ | var regexTests = [ | ||
{haystack: "From: John Doe <John@do...> - 2020-07-02 17:36:03", needle: "John Doe"}, | {haystack: "From: John Doe <John@do...> - 2020-07-02 17:36:03", needle: "John Doe"}, | ||
{haystack: "From: Marc Twain <Marc@ta...> - 2010-01-03 07:36:03", needle: "Marc Twain"}, | {haystack: "From: Marc Twain <Marc@ta...> - 2010-01-03 07:36:03", needle: "Marc Twain"}, | ||
{haystack: "From: George W. Bush <GWB@wh...> - 2055-11-11 17:33:13", needle: "George W. Bush"} | {haystack: "From: George W. Bush <GWB@wh...> - 2055-11-11 17:33:13", needle: "George W. Bush"} | ||
Line 2,297: | Line 2,444: | ||
// the regex we want to evolve | // the regex we want to evolve | ||
var solution = " | var solution = "From: (.*) <.*@.*>"; | ||
// let's assume, we'd like to evolve a regex expression like this one | // let's assume, we'd like to evolve a regex expression like this one | ||
Line 2,305: | Line 2,452: | ||
}; | }; | ||
genetic.evolve(config, userData); | |||
//console.log("genetic.js is loaded and working, but disabled for now"); | |||
console.log("genetic.js is loaded and working, but disabled for now"); | |||
Line 2,318: | Line 2,462: | ||
console.log("genetic.js error:\n" +e.message); | console.log("genetic.js error:\n" +e.message); | ||
} // catch | } // catch | ||
} // evolveExpression_test() | |||
if(0) //TODO: expose via development tab | |||
if(0) | |||
try { | try { | ||
// https://github.com/cazala/synaptic | // https://github.com/cazala/synaptic | ||
Line 2,363: | Line 2,508: | ||
myPerceptron.activate([1,1]); // 0.02128894618097928 | myPerceptron.activate([1,1]); // 0.02128894618097928 | ||
console.log("Syntaptic loaded"); | console.log("Syntaptic loaded"); | ||
} catch(e) { | } catch(e) { | ||
UI.alert(e.message); | UI.alert(e.message); | ||
}</syntaxhighlight> | } | ||
</syntaxhighlight> | |||
{{Appendix}} | {{Appendix}} |