// ==UserScript== // @name 鸿科经纬 题目爬虫 (v8.0 - 导出为表格) // @namespace http://tampermonkey.net/ // @version 8.0 // @description 直接在当前页面循环抓取题目,实时打印到控制台,并可随时停止爬取并保存为CSV表格文件。 // @author Gemini // @match http://yun.hotmatrix.cn/* // @match http://eshopcourse.hotmatrix.cn/* // @grant GM_addStyle // @grant GM_download // @icon https://www.google.com/s2/favicons?sz=64&domain=hotmatrix.cn // @downloadURL none // ==/UserScript== (function() { 'use strict'; // --- 全局控制变量 --- let isCrawling = false; const allQuestions = new Map(); // --- 工具函数 --- const log = (message, ...args) => console.log(`[题目爬虫 v8.0] ${message}`, ...args); const err = (message) => console.error(`[题目爬虫 v8.0] ${message}`); const delay = ms => new Promise(resolve => setTimeout(resolve, ms)); // --- UI --- function updateButtonState(crawling) { const startButton = document.getElementById('crawlStartButton'); const stopButton = document.getElementById('crawlStopButton'); if (startButton && stopButton) { startButton.disabled = crawling; stopButton.disabled = !crawling; stopButton.style.display = crawling ? 'inline-block' : 'none'; startButton.textContent = '开始爬取题目'; } } // --- 数据处理 (核心修改) --- /** * 将题目数据转换为CSV格式的字符串 * @param {Array} data - 题目对象数组 * @returns {string} - CSV格式的字符串 */ function convertToCSV(data) { const headers = ['题号', '类型', '题目', '选项A', '选项B', '选项C', '选项D', '答案']; const csvRows = [headers.join(',')]; // Helper to escape CSV fields const escapeCSV = (field) => { if (field === null || field === undefined) { return ''; } const str = String(field); // If the field contains a comma, a quote, or a newline, enclose it in double quotes if (str.includes(',') || str.includes('"') || str.includes('\n')) { // Escape existing double quotes by doubling them const escapedStr = str.replace(/"/g, '""'); return `"${escapedStr}"`; } return str; }; for (const question of data) { const row = [ question.number, question.type, question.title, question.options['A'] || '', question.options['B'] || '', question.options['C'] || '', question.options['D'] || '', question.foundAnswer ]; csvRows.push(row.map(escapeCSV).join(',')); } return csvRows.join('\n'); } /** * 将数据保存为本地文件 * @param {Array} data - 题目对象数组 * @param {string} filename - 文件名 (包含.csv后缀) */ function saveData(data, filename) { if (!data || data.length === 0) { log('没有可保存的数据。'); return; } const csvData = convertToCSV(data); // Add BOM for Excel compatibility with UTF-8 const blob = new Blob(['\uFEFF' + csvData], { type: 'text/csv;charset=utf-8;' }); log(`准备下载 ${data.length} 道题目到表格文件...`); try { GM_download({ url: URL.createObjectURL(blob), name: filename, saveAs: true, onload: () => log('文件已开始下载。'), onerror: (error) => err(`GM_download 下载失败: ${error.error}`) }); } catch (e) { err('GM_download 不可用,使用备用下载方法。'); const a = document.createElement('a'); a.href = URL.createObjectURL(blob); a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(a.href); } alert(`操作完成!即将下载包含 ${data.length} 道题目的表格文件 (CSV)。`); } // --- 核心爬取逻辑 (与v7.0相同) --- async function waitForQuestionChange(doc, oldQuestionNumber) { log(`等待题目从 ${oldQuestionNumber} 更新...`); let attempts = 0; while (attempts < 10) { if (!isCrawling) return false; const numEl = doc.querySelector('#num'); const currentNum = numEl ? numEl.innerText.trim() : ''; if (currentNum && currentNum !== oldQuestionNumber) { log(`新题目 ${currentNum} 已加载。`); await delay(300); return true; } await delay(1000); attempts++; } err("等待新题目加载超时。"); return false; } function stopCrawling() { log("用户请求停止,将在当前题目处理完毕后终止..."); isCrawling = false; const stopButton = document.getElementById('crawlStopButton'); if (stopButton) stopButton.disabled = true; } async function startCrawling() { if (isCrawling) return; isCrawling = true; allQuestions.clear(); updateButtonState(true); try { const docContext = document; while (isCrawling) { await delay(200); const rightCon = docContext.querySelector('.rightCon'); if (!rightCon) { log('找不到题目主容器 .rightCon,爬取结束。'); break; } const numEl = rightCon.querySelector('#num'); const currentQuestionNumber = numEl ? numEl.innerText.trim() : null; if (!currentQuestionNumber) { err('页面结构不完整,找不到题目编号。'); break; } if (allQuestions.has(currentQuestionNumber)) { log(`检测到重复题目编号 ${currentQuestionNumber},爬取结束。`); break; } const startButton = document.getElementById('crawlStartButton'); if (startButton) startButton.textContent = `正在爬取第 ${currentQuestionNumber} 题...`; const typeEl = rightCon.querySelector('#type'); const titleEl = rightCon.querySelector('.titleDetail p'); const optionsUl = rightCon.querySelector('.titleDetail ul'); let cleanTitle = titleEl.innerText; let answer = '未找到'; const answerSpan = titleEl.querySelector('span[style*="color:red"]'); if (answerSpan) { const answerMatch = answerSpan.innerText.match(/正确答案:\s*([A-Z]+)/); if (answerMatch) { answer = answerMatch[1]; cleanTitle = cleanTitle.replace(answerSpan.innerText, '').trim(); } } if (answer === '未找到') { const selectedLi = optionsUl.querySelector('li.selected'); if (selectedLi) answer = selectedLi.querySelector('button')?.innerText.trim() || '未找到'; } const options = {}; optionsUl.querySelectorAll('li').forEach(li => { const key = li.querySelector('button')?.innerText.trim(); const value = li.querySelector('i')?.innerText.trim(); if (key && value) options[key] = value; }); const questionData = { number: currentQuestionNumber, type: typeEl.innerText.trim(), title: cleanTitle, options, foundAnswer: answer }; allQuestions.set(currentQuestionNumber, questionData); console.groupCollapsed(`[题目 ${questionData.number}] ${questionData.title.substring(0, 40)}...`); console.log("题目详情:", questionData); console.groupEnd(); const nextButton = docContext.querySelector('#next'); if (nextButton && !nextButton.disabled) { log("点击 '下一题'。"); nextButton.click(); if (!await waitForQuestionChange(docContext, currentQuestionNumber)) { log("点击下一题后题目未更新或被用户中断,爬取结束。"); break; } } else { log('找不到可点击的“下一题”按钮,爬取结束。'); break; } } } catch (error) { err(`爬取过程中发生严重错误: ${error.message}`); alert(`爬取过程中发生严重错误,请按F12打开控制台查看详情。\n错误: ${error.message}`); } finally { isCrawling = false; saveData(Array.from(allQuestions.values()), '鸿科经纬-题目.csv'); updateButtonState(false); } } function setupUI() { if (document.getElementById('crawlStartButton')) return; const startButton = document.createElement('button'); startButton.id = 'crawlStartButton'; startButton.textContent = '开始爬取题目'; startButton.addEventListener('click', startCrawling); const stopButton = document.createElement('button'); stopButton.id = 'crawlStopButton'; stopButton.textContent = '停止并保存'; stopButton.addEventListener('click', stopCrawling); stopButton.style.display = 'none'; GM_addStyle(` #crawlStartButton, #crawlStopButton { position: fixed; right: 20px; z-index: 10000; padding: 10px 15px; color: white; border: none; border-radius: 5px; cursor: pointer; font-size: 16px; box-shadow: 0 4px 8px rgba(0,0,0,0.2); transition: background-color 0.3s, color 0.3s; } #crawlStartButton { top: 100px; background-color: #4CAF50; } #crawlStartButton:hover { background-color: #45a049; } #crawlStartButton:disabled { background-color: #A5A5A5; color: #E0E0E0; cursor: not-allowed; } #crawlStopButton { top: 155px; background-color: #f44336; } #crawlStopButton:hover { background-color: #d32f2f; } #crawlStopButton:disabled { background-color: #A5A5A5; color: #E0E0E0; cursor: not-allowed; } `); document.body.appendChild(startButton); document.body.appendChild(stopButton); log("UI已加载,等待用户操作。"); } if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', setupUI); } else { setupUI(); } })();