// ==UserScript== // @name 大众点评评论 // @namespace http://tampermonkey.net/ // @version 0.2 // @description get comments of dianping // @author You // @match http://www.dianping.com/shop* // @match https://www.dianping.com/shop* // @icon https://www.google.com/s2/favicons?domain=dianping.com // @grant none // @require https://greasyfork.org/scripts/388505-html2canvas-min/code/html2canvasmin.js?version=724511 // @require https://unpkg.com/tesseract.js@2.1.0/dist/tesseract.min.js // @downloadURL none // ==/UserScript== /* global html2canvas Tesseract */ const moreBtnClass = '.fold'; const lessBtnClass = '.unfold'; const commentClass = '.review-words'; (function() { 'use strict'; const $ = document.querySelectorAll.bind(document); const renderCmt = elm => { return new Promise((resolve, reject) => { html2canvas(elm, { allowTaint: true, scale: 2, useCORS: true }).then(canvas => { // const data = canvas.toDataURL(); // console.log('data', data); // document.body.append(canvas); // return resolve(canvas); console.log('start to recognize'); Tesseract.recognize(canvas, 'chi_sim', { langPath: 'https://raw.githubusercontent.com/naptha/tessdata/gh-pages/4.0.0_best/', }).then(res => { console.log(res); const { text } = res.data; return resolve(text); }) }) }); } const getAllCommentCanvas = async () => { const comments = $(commentClass); let tasks = []; for(let i = 0;i <= comments.length; i++) { const cmt = comments[i]; const res = await renderCmt(cmt) console.log('res', i, res); } //comments.forEach((cmt, idx) => { //const imgs = cmt.querySelectorAll('img'); //imgs.forEach(img => cmt.removeChild(img)); //if (idx === 0) { //tasks.push(renderCmt(cmt)); //} //}); const ret = []; return ret; } let btn = document.createElement('button'); btn.innerHTML = '开始采集'; btn.style.position = 'fixed' btn.style.right = 0; btn.style.bottom = 0; document.body.appendChild(btn); btn.onclick = () => { const moreBtns = $(moreBtnClass); moreBtns.forEach(b => { b.click(); b.style.opacity = 0; }); const lessBtns = $(lessBtnClass); lessBtns.forEach(l => l.style.opacity = 0); getAllCommentCanvas() .then(pics => { pics.forEach(pic => { // console.log('pic', pic); }) }) .catch(console.error) } })();