// ==UserScript== // @name csdn2md - 批量下载CSDN文章为Markdown // @namespace http://tampermonkey.net/ // @version 1.0.2 // @description 下载CSDN文章为Markdown格式,支持专栏批量下载。CSDN排版经过精心调教,最大程度支持CSDN的全部Markdown语法:KaTeX内联公式、KaTeX公式块、图片、内联代码、代码块、Bilibili视频控件、有序/无序/任务/自定义列表、目录、注脚、加粗斜体删除线下滑线高亮、内容居左/中/右、引用块、链接、快捷键(kbd)、表格、上下标、甘特图、UML图、FlowChart流程图 // @author ShizuriYuki // @match https://*.csdn.net/* // @icon https://g.csdnimg.cn/static/logo/favicon32.ico // @grant none // @run-at document-end // @license PolyForm Strict License 1.0.0 https://polyformproject.org/licenses/strict/1.0.0/ // @supportURL https://github.com/Qalxry/csdn2md // @downloadURL none // ==/UserScript== (function () { "use strict"; // 创建悬浮窗 const floatWindow = document.createElement("div"); floatWindow.style.position = "fixed"; floatWindow.style.bottom = "20px"; floatWindow.style.right = "20px"; floatWindow.style.padding = "10px"; floatWindow.style.backgroundColor = "rgba(0, 0, 0, 0.7)"; floatWindow.style.color = "#fff"; floatWindow.style.borderRadius = "5px"; floatWindow.style.boxShadow = "0 2px 5px rgba(0, 0, 0, 0.5)"; floatWindow.style.zIndex = "9999"; // 创建下载按钮 const downloadButton = document.createElement("button"); downloadButton.textContent = "下载CSDN文章为Markdown\n(支持专栏和文章页面,推荐使用typora打开下载的Markdown)"; downloadButton.style.textAlign = "center"; downloadButton.style.padding = "5px 10px"; downloadButton.style.border = "none"; downloadButton.style.backgroundColor = "#4CAF50"; downloadButton.style.color = "white"; downloadButton.style.borderRadius = "3px"; downloadButton.style.cursor = "pointer"; // 按钮点击事件 downloadButton.addEventListener("click", runMain); // 将按钮添加到悬浮窗 floatWindow.appendChild(downloadButton); document.body.appendChild(floatWindow); /** * 将 SVG 图片转换为 Base64 编码的字符串。 * @param {string} text - SVG 图片的文本内容。 * @returns {string} - Base64 编码的字符串。 */ function svgToBase64(svgText) { const uint8Array = new TextEncoder().encode(svgText); const binaryString = uint8Array.reduce((data, byte) => data + String.fromCharCode(byte), ""); return btoa(binaryString); } /** * 压缩HTML内容,移除多余的空白和换行符。 * @param {string} html - 输入的HTML字符串。 * @returns {string} - 压缩后的HTML字符串。 */ function shrinkHtml(html) { return html .replace(/>\s+<") // 去除标签之间的空白 .replace(/\s{2,}/g, " ") // 多个空格压缩成一个 .replace(/^\s+|\s+$/g, ""); // 去除首尾空白 } /** * 清除字符串中的特殊字符。 * @param {*} str * @returns */ function clearSpecialChars(str) { return str.replace(/[\s]{2,}/g, "").replace(/[\u200B-\u200F\u202A-\u202E\u2060-\u206F\uFEFF\u00AD\u034F\u061C\u180E\u2800\u3164\uFFA0\uFFF9-\uFFFB]/g, ""); } /** * 将 HTML 内容转换为 Markdown 格式。 * @param {Element} html - HTML 内容。 * @returns {string} - 转换后的 Markdown 字符串。 */ function htmlToMarkdown(html) { const htype_map = { 一级标题: 1, 二级标题: 2, 三级标题: 3, 四级标题: 4, 五级标题: 5, 六级标题: 6, }; // // Create a DOM parser // const document = new JSDOM(html).window.document; // const content = document.getElementById("content_views"); // Create a DOM parser // const parser = new DOMParser(); // const doc = parser.parseFromString(html, 'text/html'); // const content = doc.getElementById('content_views'); // Directly use the input HTML content const content = html; let markdown = ""; // 辅助函数,用于转义特殊的 Markdown 字符 const escapeMarkdown = (text) => { // return text.replace(/([\\`*_\{\}\[\]()#+\-.!])/g, "\\$1").trim(); return text.trim(); }; /** * 递归处理 DOM 节点并将其转换为 Markdown。 * @param {Node} node - 当前的 DOM 节点。 * @param {number} listLevel - 当前列表嵌套级别。 * @returns {string} - 节点的 Markdown 字符串。 */ function processNode(node, listLevel = 0) { let result = ""; const ELEMENT_NODE = 1; const TEXT_NODE = 3; const COMMENT_NODE = 8; switch (node.nodeType) { case ELEMENT_NODE: switch (node.tagName.toLowerCase()) { case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": { // 解析 id 里的 url 编码,如 %E4%B8%80%E7%BA%A7%E6%A0%87%E9%A2%98 -> 一级标题 if (node.getAttribute("id")) { const htype = decodeURIComponent(node.getAttribute("id")); result += `${"#".repeat(htype_map[htype])} ${node.textContent.trim()}\n\n`; } else { const htype = Number(node.tagName[1]); result += `${"#".repeat(htype)} ${node.textContent.trim()}\n\n`; } } break; case "p": { const style = node.getAttribute("style"); if (node.getAttribute("id") === "main-toc") { result += `**目录**\n\n[TOC]\n\n`; break; } let text = processChildren(node, listLevel); if (style) { if (style.includes("padding-left")) { break; } if (style.includes("text-align:center")) { text = `
${text}
\n\n`; } else if (style.includes("text-align:right")) { text = `
${text}
\n\n`; } else if (style.includes("text-align:justify")) { text = `
${text}
\n\n`; } else { text += "\n\n"; } } else { text += "\n\n"; } result += text; } break; case "strong": case "b": result += ` **${processChildren(node, listLevel).trim()}** `; break; case "em": case "i": result += ` *${processChildren(node, listLevel).trim()}* `; break; case "u": result += ` ${processChildren(node, listLevel).trim()} `; break; case "s": case "strike": result += ` ~~${processChildren(node, listLevel).trim()}~~ `; break; case "a": { const node_class = node.getAttribute("class"); if (node_class && node_class.includes("footnote-backref")) { break; } const href = node.getAttribute("href") || ""; const text = processChildren(node, listLevel); result += ` [${text}](${href}) `; } break; case "img": { const src = node.getAttribute("src") || ""; const alt = node.getAttribute("alt") || ""; const cls = node.getAttribute("class") || ""; // const width = node.getAttribute("width") || ""; // const height = node.getAttribute("height") || ""; // 获取实际渲染的宽度和高度 const computedStyle = window.getComputedStyle(node); const width = parseFloat(computedStyle.width); const height = parseFloat(computedStyle.height); if (cls.includes("mathcode")) { result += `$$\n${alt}\n$$`; } else { if (src.includes("#pic_center")) { result += "\n\n"; } else { result += " "; } if (width && height) { // result += `${alt}`; result += `${alt}`; } else { result += `![${alt}](${src})`; } } } break; case "ul": result += processList(node, listLevel, false); break; case "ol": result += processList(node, listLevel, true); break; case "blockquote": { const text = processChildren(node, listLevel) .trim() .split("\n") .map((line) => (line ? `> ${line}` : "> ")) .join("\n"); result += `${text}\n\n`; } break; case "pre": { const codeNode = node.querySelector("code"); if (codeNode) { const className = codeNode.className || ""; const languageMatch = className.match(/language-(\w+)/); const language = languageMatch ? languageMatch[1] : ""; // const codeText = codeNode.textContent.replace(/^\s+|\s+$/g, ''); // result += `\`\`\`${language}\n${codeText}\n\`\`\`\n\n`; result += `\`\`\`${language}\n${processCodeBlock(codeNode)}\`\`\`\n\n`; } else { console.warn("Code block without element:", node.outerHTML); const codeText = node.textContent.replace(/^\s+|\s+$/g, ""); result += `\`\`\`\n${codeText}\n\`\`\`\n\n`; } } break; case "code": { const codeText = node.textContent; result += ` \`${codeText}\` `; } break; case "hr": if (node.getAttribute("id") !== "hr-toc") { result += `---\n\n`; } break; case "br": result += ` \n`; break; case "table": result += processTable(node) + "\n\n"; break; // case 'iframe': // { // const src = node.getAttribute('src') || ''; // const iframeHTML = node.outerHTML.replace('>', ' style="width: 100%; aspect-ratio: 2;">'); // Ensure proper closing // result += `${iframeHTML}\n\n`; // } // break; case "div": { const className = node.getAttribute("class") || ""; if (className.includes("csdn-video-box")) { // Handle video boxes or other specific divs // result += `
${processChildren(node, listLevel)}
\n\n`; // 不递归处理了,直接在这里进行解析 const iframe = node.querySelector("iframe"); const src = iframe.getAttribute("src") || ""; const title = node.querySelector("p").textContent || ""; const iframeHTML = iframe.outerHTML.replace( ">", ' style="width: 100%; aspect-ratio: 2;">' ); // Ensure video box is full width result += `
${title}${iframeHTML}
\n\n`; } else if (className.includes("toc")) { const customTitle = node.querySelector("h4").textContent || ""; result += `**${customTitle}**\n\n[TOC]\n\n`; } else { result += processChildren(node, listLevel); } } break; case "span": { const node_class = node.getAttribute("class"); if (node_class) { if (node_class.includes("katex--inline")) { // class="katex-mathml" const mathml = clearSpecialChars(node.querySelector(".katex-mathml").textContent); const katex_html = clearSpecialChars(node.querySelector(".katex-html").textContent); // result += ` $${mathml.replace(katex_html, "")}$ `; if (mathml.startsWith(katex_html)) { result += ` $${mathml.replace(katex_html, "")}$ `; } else { // 字符串切片,去掉 mathml 开头等同长度的 katex_html,注意不能用 replace,因为 katex_html 里的字符顺序可能会变 result += ` $${mathml.slice(katex_html.length)}$ `; } break; } else if (node_class.includes("katex--display")) { const mathml = clearSpecialChars(node.querySelector(".katex-mathml").textContent); const katex_html = clearSpecialChars(node.querySelector(".katex-html").textContent); // result += `$$\n${mathml.replace(katex_html, "")}\n$$\n\n`; if (mathml.startsWith(katex_html)) { result += `$$\n${mathml.replace(katex_html, "")}\n$$\n\n`; } else { // 字符串切片,去掉 mathml 开头等同长度的 katex_html,注意不能用 replace,因为 katex_html 里的字符顺序可能会变 result += `$$\n${mathml.slice(katex_html.length)}\n$$\n\n`; } break; } } const style = node.getAttribute("style") || ""; if (style.includes("background-color") || style.includes("color")) { result += `${processChildren(node, listLevel)}`; } else { result += processChildren(node, listLevel); } } break; case "kbd": result += ` ${node.textContent} `; break; case "mark": result += ` ${processChildren(node, listLevel)} `; break; case "sub": result += `${processChildren(node, listLevel)}`; break; case "sup": { const node_class = node.getAttribute("class"); if (node_class && node_class.includes("footnote-ref")) { result += `[^${node.textContent}]`; } else { result += `${processChildren(node, listLevel)}`; } } break; case "svg": { const style = node.getAttribute("style"); if (style && style.includes("display: none")) { break; } // 必须为 foreignObject 里的 div 添加属性 xmlns="http://www.w3.org/1999/xhtml" ,否则 typora 无法识别 const foreignObjects = node.querySelectorAll("foreignObject"); for (const foreignObject of foreignObjects) { const divs = foreignObject.querySelectorAll("div"); divs.forEach((div) => { div.setAttribute("xmlns", "http://www.w3.org/1999/xhtml"); }); } // 检查是否有 style 标签存在于 svg 元素内,如果有,则需要将 svg 元素转换为 img 元素,用 Base64 编码的方式显示。否则直接返回 svg 元素 if (node.querySelector("style")) { const base64 = svgToBase64(node.outerHTML); // result += `SVG Image`; result += `![SVG Image](data:image/svg+xml;base64,${base64})\n\n`; } else { result += `
${node.outerHTML}
\n\n`; } } break; case "section": // 这个是注脚的内容 { const node_class = node.getAttribute("class"); if (node_class && node_class.includes("footnotes")) { result += processFootnotes(node); } } break; case "input": // 仅处理 checkbox 类型的 input 元素 if (node.getAttribute("type") === "checkbox") { result += `[${node.checked ? "x" : " "}] `; } break; case "dl": // 自定义列表,懒得解析了,直接用 html 吧 result += `${shrinkHtml(node.outerHTML)}\n\n`; break; default: result += processChildren(node, listLevel); result += "\n\n"; break; } break; case TEXT_NODE: result += escapeMarkdown(node.textContent); break; case COMMENT_NODE: // Ignore comments break; default: break; } return result; } /** * 处理给定节点的子节点。 * @param {Node} node - 父节点。 * @param {number} listLevel - 当前列表嵌套级别。 * @returns {string} - 子节点拼接后的 Markdown 字符串。 */ function processChildren(node, listLevel) { let text = ""; node.childNodes.forEach((child) => { text += processNode(child, listLevel); }); return text; } /** * 处理列表元素 (