feat: ocr识别功能

6 months ago · d6c53c9c77
parent 23c0570a79
commit d6c53c9c77
1 changed files with 327 additions and 0 deletions
--- a/demoHtml/ocr.html
+++ b/demoHtml/ocr.html
@ -0,0 +1,327 @@
+<!doctype html>
+<html lang="zh-CN">
+    <head>
+        <meta charset="UTF-8" />
+        <meta
+            name="viewport"
+            content="width=device-width, initial-scale=1.0"
+        />
+        <title>网络日志Header提取工具</title>
+        <script src="https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"></script>
+        <style>
+            body {
+                font-family: Arial, sans-serif;
+                max-width: 800px;
+                margin: 0 auto;
+                padding: 20px;
+                line-height: 1.6;
+            }
+            h1 {
+                color: #333;
+                text-align: center;
+            }
+            .container {
+                display: flex;
+                flex-direction: column;
+                gap: 20px;
+            }
+            .upload-section {
+                border: 2px dashed #ccc;
+                padding: 20px;
+                text-align: center;
+                border-radius: 5px;
+                background-color: #f9f9f9;
+            }
+            .upload-section:hover {
+                border-color: #aaa;
+            }
+            #imagePreview {
+                max-width: 100%;
+                margin-top: 10px;
+                display: none;
+            }
+            .result-section {
+                margin-top: 20px;
+            }
+            .header-item {
+                display: flex;
+                align-items: center;
+                margin-bottom: 10px;
+                padding: 10px;
+                background-color: #f0f0f0;
+                border-radius: 5px;
+            }
+            .header-label {
+                font-weight: bold;
+                min-width: 180px;
+            }
+            .header-value {
+                flex-grow: 1;
+                word-break: break-all;
+                padding: 0 10px;
+            }
+            .copy-btn {
+                background-color: #4caf50;
+                color: white;
+                border: none;
+                padding: 8px 12px;
+                border-radius: 4px;
+                cursor: pointer;
+                transition: background-color 0.3s;
+            }
+            .copy-btn:hover {
+                background-color: #45a049;
+            }
+            .progress {
+                margin-top: 10px;
+                height: 20px;
+                background-color: #e0e0e0;
+                border-radius: 4px;
+                overflow: hidden;
+                display: none;
+            }
+            .progress-bar {
+                height: 100%;
+                background-color: #4caf50;
+                width: 0%;
+                transition: width 0.3s;
+            }
+            #statusText {
+                margin-top: 5px;
+                font-size: 14px;
+                color: #666;
+            }
+            .instructions {
+                background-color: #e7f3fe;
+                border-left: 6px solid #2196f3;
+                padding: 10px 15px;
+                margin-bottom: 20px;
+            }
+        </style>
+    </head>
+    <body>
+        <h1>网络日志Header提取工具</h1>
+
+        <div class="container">
+            <div class="instructions">
+                <h3>使用说明：</h3>
+                <p>1. 上传包含网络请求日志的图片（截图或照片）</p>
+                <p>2. 系统将自动识别图片中的文本</p>
+                <p>3. 从识别结果中提取以下Header值：</p>
+                <ul>
+                    <li>bkatimestamp</li>
+                    <li>bkatimestamptoke</li>
+                    <li>brcpEaSessionTicket</li>
+                    <li>brcpEaDeviceId</li>
+                </ul>
+                <p>4. 点击复制按钮可以复制对应的值</p>
+            </div>
+
+            <div
+                class="upload-section"
+                id="dropArea"
+            >
+                <p>拖放图片到这里，或点击选择图片</p>
+                <input
+                    type="file"
+                    id="fileInput"
+                    accept="image/*"
+                    style="display: none"
+                />
+                <button onclick="document.getElementById('fileInput').click()">选择图片</button>
+                <img
+                    id="imagePreview"
+                    alt="预览图"
+                />
+            </div>
+
+            <div
+                class="progress"
+                id="progressBar"
+            >
+                <div
+                    class="progress-bar"
+                    id="progressBarInner"
+                ></div>
+            </div>
+            <div id="statusText"></div>
+
+            <div
+                class="result-section"
+                id="resultSection"
+                style="display: none"
+            >
+                <h2>提取结果</h2>
+                <div id="headerResults"></div>
+            </div>
+        </div>
+
+        <script>
+            //保证页面先加载完毕再执行下面代码
+            window.onload = function () {
+                // 全局变量
+                let imageFile = null;
+
+                // DOM元素
+                const fileInput = document.getElementById('fileInput');
+                const dropArea = document.getElementById('dropArea');
+                const imagePreview = document.getElementById('imagePreview');
+                const progressBar = document.getElementById('progressBar');
+                const progressBarInner = document.getElementById('progressBarInner');
+                const statusText = document.getElementById('statusText');
+                const resultSection = document.getElementById('resultSection');
+                const headerResults = document.getElementById('headerResults');
+
+                // 事件监听器
+                fileInput.addEventListener('change', handleFileSelect);
+                dropArea.addEventListener('dragover', handleDragOver);
+                dropArea.addEventListener('drop', handleDrop);
+
+                // 处理文件选择
+                function handleFileSelect(e) {
+                    const file = e.target.files[0];
+                    if (file && file.type.match('image.*')) {
+                        processImageFile(file);
+                    }
+                }
+
+                // 处理拖放
+                function handleDragOver(e) {
+                    e.preventDefault();
+                    e.stopPropagation();
+                    dropArea.style.borderColor = '#4CAF50';
+                }
+
+                function handleDrop(e) {
+                    e.preventDefault();
+                    e.stopPropagation();
+                    dropArea.style.borderColor = '#ccc';
+
+                    const file = e.dataTransfer.files[0];
+                    if (file && file.type.match('image.*')) {
+                        processImageFile(file);
+                    }
+                }
+
+                // 处理图片文件
+                function processImageFile(file) {
+                    imageFile = file;
+
+                    // 显示预览
+                    const reader = new FileReader();
+                    reader.onload = function (e) {
+                        imagePreview.src = e.target.result;
+                        imagePreview.style.display = 'block';
+                    };
+                    reader.readAsDataURL(file);
+
+                    // 开始OCR处理
+                    performOCR(file);
+                }
+
+                // 执行OCR识别
+                function performOCR(imageFile) {
+                    progressBar.style.display = 'block';
+                    statusText.textContent = '正在识别图片中的文本...';
+
+                    Tesseract.recognize(
+                        imageFile,
+                        'eng+chi_sim', // 使用英文和简体中文识别
+                        {
+                            logger: (m) => updateProgress(m),
+                        }
+                    )
+                        .then(({ data: { text } }) => {
+                            statusText.textContent = '识别完成，正在提取Header信息...';
+                            extractHeaders(text);
+                        })
+                        .catch((err) => {
+                            console.error(err);
+                            statusText.textContent = '识别过程中出错: ' + err.message;
+                        });
+                }
+
+                // 更新进度
+                function updateProgress(message) {
+                    if (message.status === 'recognizing text') {
+                        progressBarInner.style.width = `${message.progress * 100}%`;
+                    }
+
+                    if (message.status) {
+                        statusText.textContent = `状态: ${message.status}`;
+                    }
+                }
+
+                // 从文本中提取header信息
+                function extractHeaders(text) {
+                    console.log('提取Header信息: ', text);
+                    // 定义要提取的header键
+                    const headerKeys = ['bkatimestamp', 'bkatimestamptoke', 'brcpEaSessionTicket', 'brcpEaDeviceId'];
+
+                    // 初始化结果对象
+                    const results = {};
+                    headerKeys.forEach((key) => {
+                        results[key] = '';
+                    });
+
+                    // 尝试从文本中提取每个header
+                    headerKeys.forEach((key) => {
+                        // 构造正则表达式，匹配header键和值
+                        // 考虑到日志可能有多种格式，使用更灵活的正则
+                        const regex = new RegExp(`${key}[\\s:=]+([^\\s\\n]+)`, 'i');
+                        debugger;
+                        const match = text.match(regex);
+
+                        if (match && match[1]) {
+                            results[key] = match[1].replace(/[",;]/g, ''); // 清理常见干扰字符
+                        }
+                    });
+
+                    // 显示结果
+                    displayResults(results);
+                }
+
+                // 显示提取结果
+                function displayResults(results) {
+                    headerResults.innerHTML = '';
+
+                    for (const [key, value] of Object.entries(results)) {
+                        const itemDiv = document.createElement('div');
+                        itemDiv.className = 'header-item';
+
+                        const labelSpan = document.createElement('span');
+                        labelSpan.className = 'header-label';
+                        labelSpan.textContent = key;
+
+                        const valueSpan = document.createElement('span');
+                        valueSpan.className = 'header-value';
+                        valueSpan.textContent = value || '未找到';
+
+                        const copyBtn = document.createElement('button');
+                        copyBtn.className = 'copy-btn';
+                        copyBtn.textContent = '复制';
+                        copyBtn.onclick = () => {
+                            if (value) {
+                                navigator.clipboard.writeText(value).then(() => {
+                                    copyBtn.textContent = '已复制!';
+                                    setTimeout(() => {
+                                        copyBtn.textContent = '复制';
+                                    }, 2000);
+                                });
+                            }
+                        };
+
+                        itemDiv.appendChild(labelSpan);
+                        itemDiv.appendChild(valueSpan);
+                        itemDiv.appendChild(copyBtn);
+
+                        headerResults.appendChild(itemDiv);
+                    }
+
+                    resultSection.style.display = 'block';
+                    statusText.textContent = '提取完成！';
+                }
+            };
+        </script>
+    </body>
+</html>