// ==UserScript== // @name Extract Special Characters // @namespace http://github.com/kba/ // @include https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Special-Characters // @include https://github.com/UB-Mannheim/ocr-gt-tools/wiki/Error-Tags // @description Extract special character data from ocr-gt-tools wiki // @version 1 // @require https://code.jquery.com/jquery-2.2.3.min.js // @require https://cdnjs.cloudflare.com/ajax/libs/z-schema/3.17.0/ZSchema-browser.js // @grant GM_addStyle // @grant GM_setClipboard // @downloadURL https://update.greasyfork.icu/scripts/18942/Extract%20Special%20Characters.user.js // @updateURL https://update.greasyfork.icu/scripts/18942/Extract%20Special%20Characters.meta.js // ==/UserScript== /*globals GM_addStyle */ /*globals ZSchema */ var CSS = ` pre.schema-error { background: #a00; color: white; white-space: pre-wrap; } div#glyph-bar { font-size: x-large; position:fixed; bottom: 0; height: 48px; border: 2px solid black; background: white; width: 100%; } div#glyph-bar .left * { float: left; } div#glyph-bar .right * { float: right; } div#glyph-bar * { height: 100%; font-size: x-large; } div#glyph-bar input[type='text'] { font-family: "Garamond", "Bookman", serif; } div#schema-bar { position: fixed; z-index: 3000; top: 0; background: #900; color: white !important; width: 100%; font-size: x-large; height: 48px; border: 2px solid black; } div#schema-invalid { display: none; } div#schema-invalid a { display: inline-block; color: white !important; float: none; margin: 0 2px; } `; var SCHEMAS = { 'Special-Characters': { 'type': 'object', "additionalProperties": false, 'properties': { 'id': { 'type': 'string', 'pattern': '^[a-z0-9-]+$', }, 'sample': { 'type': 'array', 'items': { 'type': 'string', 'pattern': '^
 
`); $("#glyph-input").on('keyup', function(e) { var $input = $("#glyph-input"); var from = $input[0].selectionStart; var to = $input[0].selectionEnd; if (from == to) { from -= 1; } $('#glyph-propose').empty(); var $propose = $('#glyph-propose'); var val = $input.val(); var chosen = val.substring(from, to); console.log(chosen, from, to); $.each(scraped, function() { var glyphDesc = this; if (glyphDesc.baseLetter.indexOf(chosen) === -1) { return; } $.each(glyphDesc.sample, function(i, sample) { $propose.append($(sample) .on('click', function(e) { e.preventDefault(); $input.val(val.substr(0, from) + glyphDesc.recognition + val.substr(to)); })); }); }); }); }, 'Error-Tags': function(scraped) { window.alert('Not Implemented'); } }; function scrapeJsonFromWikiPage(schema) { var parsed = {}; var validator = new ZSchema(); var h2s = $(".markdown-body h2").get(); for (var i = 0; i < h2s.length; i++) { var $h2 = $(h2s[i]); var thingDesc = {}; var thingId = $h2.text().trim(); parsed[thingId] = thingDesc; var lis = $h2.next('ul').find('li').get(); for (var j = 0; j < lis.length; j++) { var liHtml = $(lis[j]).html(); var colonIndex = liHtml.indexOf(':'); var varName = liHtml.substring(0, colonIndex) .toLowerCase() .replace(/[^a-z0-9]+/g, '_') .replace(/_([a-z])/g, function(orig, ch) { return ch.toUpperCase(); }) .replace(/^_|_$/, ''); // console.log(`Parsing '${varName}'`); var rawValue = liHtml.substring(colonIndex + 1).trim(); if (schema.properties[varName] && schema.properties[varName].type === 'array') { thingDesc[varName] = rawValue.split(/\s*;\s*/); } else if (schema.properties[varName] && schema.properties[varName].type === 'number') { thingDesc[varName] = parseFloat(rawValue); } else if (/[A-Z][a-z]$/.test(varName)) { var lang = varName.substr(-2).toLowerCase(); varName = varName.substring(0, varName.length - 2); thingDesc[varName] = thingDesc[varName] || {}; thingDesc[varName][lang] = rawValue; } else { thingDesc[varName] = rawValue; } } console.log([thingDesc, schema]); if (!validator.validate(thingId, schema.properties.id)) { showError(thingId, validator.getLastErrors()); } if (!validator.validate(thingDesc, schema)) { showError(thingId, validator.getLastErrors()); } } return parsed; }; function escapeHTML(str) { var entityMap = { "&": "&", "<": "<", ">": ">", '"': '"', "'": ''', "/": '/' }; return String(str).replace(/[&<>"'\/]/g, function(s) { return entityMap[s]; }); } function showError(faultyId, err) { $(`h2:contains('${faultyId}')`).append( `
${escapeHTML(JSON.stringify(err, null, 2))}
`); $("#schema-invalid").show().append( `[${ $("#schema-invalid a").length + 1}]`); } $(function() { GM_addStyle(CSS); $("body").prepend( `
!! INVALID
`); var wikiPage = window.location.href.replace(/.*\//, '').replace(/#.*$/, ''); var schema = SCHEMAS[wikiPage]; var scraped = scrapeJsonFromWikiPage(schema); ON_LOAD[wikiPage](scraped); $("#copy-schema").on('click', function() { GM_setClipboard(JSON.stringify(SCHEMAS[schema], null, 2)); window.alert("Copied JSON schema to clipboard"); }); $("#copy-json").on('click', function() { GM_setClipboard(JSON.stringify(scraped, null, 2)); window.alert("Copied JSON schema to clipboard"); }); });