Tampermonkey脚本问题:MathJax数学公式识别与排版修复求助
问题描述
我在辅导机构的学习管理系统(LMS)中使用Tampermonkey脚本,将系统内的普通文本数学表达式转换为LaTeX/MathJax格式(例如把4X2=x2-2x转为$4 \times 2 = x^2 - 2x$)。该脚本由Claude生成,目前存在文本识别不准确和公式排版换行导致可读性差的问题,请求修复建议。当前完整脚本代码如下:
// ==UserScript== // @name Smart Math Auto-Renderer // @namespace http://tampermonkey.net/ // @version 9.2 // @description Detect math by indicators and render with MathJax // @match *://*/* // @grant none // ==/UserScript== (function() { 'use strict'; // Add CSS to prevent wrapping in math const style = document.createElement('style'); style.textContent = ` .MathJax, [class*="MJX"], mjx-container { white-space: nowrap !important; overflow-x: auto !important; } `; document.head.appendChild(style); window.MathJax = { tex: { inlineMath: [['$', '$'], ['\\(', '\\)']], displayMath: [['$$', '$$'], ['\\[', '\\]']], processEscapes: true, processEnvironments: true }, options: { skipHtmlTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code'] }, startup: { pageReady: () => { return MathJax.startup.defaultPageReady().then(() => { console.log('✓ MathJax loaded'); setTimeout(convertMath, 1000); }); } } }; let script = document.createElement('script'); script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js'; script.async = true; document.head.appendChild(script); const processedNodes = new WeakSet(); function hasMathIndicators(text) { if (/^(Solution:|Select one:|The correct answer is:|Given that)/.test(text)) { return false; } const indicators = [ /=/, /\d+\s*[+\-*/×]\s*\d+/, /\d+%/, /\d+\/\d+/, /[a-z]\s*[+\-*/×=]\s*\d+/i, /\d+\s*[a-z]/i, /\^/, /:/, /X/ ]; return indicators.some(pattern => pattern.test(text)); } function processMathPart(math) { // Insert spaces before capitals (for camelCase splitting) math = math.replace(/([a-z])([A-Z])/g, '$1 $2'); // Insert space between letter and number math = math.replace(/([a-z])(\d)/gi, '$1 $2'); // Insert space between number and capital letter math = math.replace(/(\d)([A-Z])/g, '$1 $2'); // Convert "of" to proper spacing when between % and variable math = math.replace(/%\s*of\s*([a-z0-9])/gi, '% \\text{ of } $1'); // Convert ALL X to times FIRST (before other replacements) math = math.replace(/X/g, '\\times'); // Convert lowercase x to times when between numbers math = math.replace(/(\d)\s*x\s*(\d)/gi, '$1 \\times $2'); // Convert ALL / to fractions math = math.replace(/(\d+)\/\(([^)]+)\)/g, '\\frac{$1}{$2}'); math = math.replace(/(\d+)\s*\/\s*(\d+)/g, '\\frac{$1}{$2}'); math = math.replace(/(\d+)\/([a-z])/gi, '\\frac{$1}{$2}'); // Convert : to fractions (ratios) math = math.replace(/(\d+)\s*:\s*(\d+)/g, '\\frac{$1}{$2}'); // Convert × symbol math = math.replace(/×/g, '\\times'); // Handle powers math = math.replace(/([a-wyz])\^(\d+)/gi, '$1^{$2}'); math = math.replace(/([a-wyz])2(?=\s|[+\-=)\]]|$)/gi, '$1^2'); // Handle % symbol math = math.replace(/(\d+)%/g, '$1\\%'); // Rs currency math = math.replace(/Rs\.?\s*(\d+)/gi, '\\text{Rs}$1'); math = math.replace(/Rs\.?\s*([a-z])/gi, '\\text{Rs}$1'); // Units math = math.replace(/(\d+)\s*g(?=\s|$)/gi, '$1\\text{ g}'); math = math.replace(/(\d+)\s*kg(?=\s|$)/gi, '$1\\text{ kg}'); math = math.replace(/\s+(apples|liters?|l|meters?)(?=\s|$|[,.])/gi, '\\text{ $1}'); // Clean up spacing math = math.replace(/\s+/g, ' ').trim(); return math; } function convertToLatex(text) { // Don't process pure descriptive text if (/^[A-Z][a-z\s,.']+$/i.test(text) && !/\d/.test(text) && !text.includes('=')) { return text; } return processMathPart(text); } function convertMath() { console.log('🔍 Scanning...'); const walker = document.createTreeWalker( document.body, NodeFilter.SHOW_TEXT, { acceptNode: (node) => { if (processedNodes.has(node)) return NodeFilter.FILTER_REJECT; if (node.parentElement.closest('script, style, code, pre, .MathJax, [class*="MJX"]')) { return NodeFilter.FILTER_REJECT; } return NodeFilter.FILTER_ACCEPT; } } ); let node; const replacements = []; while (node = walker.nextNode()) { let text = node.textContent.trim(); if (text.length < 3) continue; if (processedNodes.has(node)) continue; // Skip labels if (/^(Solution:|Select one:|[a-d]\.|The correct answer is:|Correct|Incorrect|Mark|Flag|Question)/.test(text)) { continue; } if (hasMathIndicators(text)) { const lines = text.split('\n'); const processedLines = lines.map(line => { line = line.trim(); if (line.length < 3) return line; if (line.startsWith('$')) return line; // Skip answer options if (/^[a-d]\.\s+/.test(line)) return line; // Skip pure text sentences if (/^[A-Z][a-z\s,.']+[^=\d]$/.test(line)) return line; if (hasMathIndicators(line)) { const latex = convertToLatex(line); // Display math for equations with = if (line.includes('=') && /\d/.test(line)) { return `$$${latex}$$`; } else if (/\d/.test(line)) { return `$${latex}$`; } } return line; }); const newText = processedLines.join('\n'); if (newText !== text) { replacements.push({node, newText}); processedNodes.add(node); } } } console.log(`📝 Found ${replacements.length} expressions`); replacements.forEach(({node, newText}) => { const span = document.createElement('span'); span.innerHTML = newText.replace(/\n/g, '<br>'); node.parentElement.replaceChild(span, node); }); if (window.MathJax && window.MathJax.typesetPromise && replacements.length > 0) { console.log('🎨 Rendering...'); MathJax.typesetPromise().then(() => { console.log('✓ Complete'); }).catch(err => console.error('❌ Error:', err)); } } if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', () => { setTimeout(convertMath, 1000); }); } else { setTimeout(convertMath, 1000); } let debounceTimer; const observer = new MutationObserver(() => { clearTimeout(debounceTimer); debounceTimer = setTimeout(convertMath, 500); }); setTimeout(() => { observer.observe(document.body, { childList: true, subtree: true }); }, 2000); })();
修复建议
1. 解决公式换行可读性问题
原脚本将换行转为<br>强制拆分公式,导致MathJax无法正确渲染连续表达式。修改方案:
- 移除换行转
<br>的逻辑,保留原始文本换行 - 优化CSS确保块级公式独占一行,内联公式不折行
// 替换原CSS样式 style.textContent = ` .MathJax, [class*="MJX"], mjx-container { white-space: nowrap !important; overflow-x: auto !important; } mjx-container[display="block"] { display: block !important; margin: 1em 0 !important; } `; // 替换replacements.forEach中的节点替换逻辑 replacements.forEach(({node, newText}) => { const span = document.createElement('span'); span.textContent = newText; // 直接赋值文本,不转换换行 node.parentElement.replaceChild(span, node); });
2. 提升文本识别准确性
(1)优化数学表达式检测逻辑
原检测规则过于宽松,容易误判纯文本。修改为仅当文本包含数字/变量+数学运算符组合时才判定为数学表达式:
function hasMathIndicators(text) { if (/^(Solution:|Select one:|The correct answer is:|Given that)/i.test(text)) { return false; } const mathPatterns = [ /\d+\s*[+\-*/×=^%:]\s*\d+/, /[a-zA-Z]\s*[+\-*/×=^]\s*\d+/, /\d+\s*[+\-*/×=^]\s*[a-zA-Z]/, /[a-zA-Z]\^[0-9]+/, /\d+%/, /\d+\/\d+/ ]; return mathPatterns.some(pattern => pattern.test(text)); }
(2)修复变量平方识别错误
原脚本仅处理[a-wyz]2,漏掉了最常用的x2情况,修改正则覆盖所有字母后的数字:
// 替换原powers处理代码 math = math.replace(/([a-zA-Z])\^(\d+)/gi, '$1^{$2}'); math = math.replace(/([a-zA-Z])(\d+)(?=\s|[+\-=)\]]|$)/gi, '$1^{$2}');
(3)区分乘号x和变量x
原脚本误将单独的变量x转为乘号,仅保留数字之间的x转为乘号:
// 替换原lowercase x转times的代码 math = math.replace(/(\d)\s*x\s*(\d)/gi, '$1 \\times $2'); // 单独的x(如x+5、3x)会保留为变量,不做转换
3. 避免重复处理和误处理
(1)增强节点过滤逻辑
添加更多禁止处理的元素类型,避免修改页面上的非目标内容:
acceptNode: (node) => { if (processedNodes.has(node)) return NodeFilter.FILTER_REJECT; const forbiddenParents = node.parentElement.closest('script, style, code, pre, .MathJax, [class*="MJX"], .math, .equation, .answer-option'); if (forbiddenParents) { return NodeFilter.FILTER_REJECT; } return NodeFilter.FILTER_ACCEPT; }
(2)调整防抖和观察器延迟
缩短重复处理频率,避免页面频繁渲染:
let debounceTimer; const observer = new MutationObserver(() => { clearTimeout(debounceTimer); debounceTimer = setTimeout(convertMath, 1000); // 延长防抖间隔到1秒 }); setTimeout(() => { observer.observe(document.body, { childList: true, subtree: true, characterData: true // 监听文本内容变化 }); }, 3000); // 延长初始观察延迟到3秒
内容的提问来源于stack exchange,提问作者chickfeet08




