feat: ocr识别功能

master
LCJ-MinYa 6 months ago
parent 23c0570a79
commit d6c53c9c77

@ -0,0 +1,327 @@
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0"
/>
<title>网络日志Header提取工具</title>
<script src="https://cdn.jsdelivr.net/npm/tesseract.js@4/dist/tesseract.min.js"></script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
line-height: 1.6;
}
h1 {
color: #333;
text-align: center;
}
.container {
display: flex;
flex-direction: column;
gap: 20px;
}
.upload-section {
border: 2px dashed #ccc;
padding: 20px;
text-align: center;
border-radius: 5px;
background-color: #f9f9f9;
}
.upload-section:hover {
border-color: #aaa;
}
#imagePreview {
max-width: 100%;
margin-top: 10px;
display: none;
}
.result-section {
margin-top: 20px;
}
.header-item {
display: flex;
align-items: center;
margin-bottom: 10px;
padding: 10px;
background-color: #f0f0f0;
border-radius: 5px;
}
.header-label {
font-weight: bold;
min-width: 180px;
}
.header-value {
flex-grow: 1;
word-break: break-all;
padding: 0 10px;
}
.copy-btn {
background-color: #4caf50;
color: white;
border: none;
padding: 8px 12px;
border-radius: 4px;
cursor: pointer;
transition: background-color 0.3s;
}
.copy-btn:hover {
background-color: #45a049;
}
.progress {
margin-top: 10px;
height: 20px;
background-color: #e0e0e0;
border-radius: 4px;
overflow: hidden;
display: none;
}
.progress-bar {
height: 100%;
background-color: #4caf50;
width: 0%;
transition: width 0.3s;
}
#statusText {
margin-top: 5px;
font-size: 14px;
color: #666;
}
.instructions {
background-color: #e7f3fe;
border-left: 6px solid #2196f3;
padding: 10px 15px;
margin-bottom: 20px;
}
</style>
</head>
<body>
<h1>网络日志Header提取工具</h1>
<div class="container">
<div class="instructions">
<h3>使用说明:</h3>
<p>1. 上传包含网络请求日志的图片(截图或照片)</p>
<p>2. 系统将自动识别图片中的文本</p>
<p>3. 从识别结果中提取以下Header值</p>
<ul>
<li>bkatimestamp</li>
<li>bkatimestamptoke</li>
<li>brcpEaSessionTicket</li>
<li>brcpEaDeviceId</li>
</ul>
<p>4. 点击复制按钮可以复制对应的值</p>
</div>
<div
class="upload-section"
id="dropArea"
>
<p>拖放图片到这里,或点击选择图片</p>
<input
type="file"
id="fileInput"
accept="image/*"
style="display: none"
/>
<button onclick="document.getElementById('fileInput').click()">选择图片</button>
<img
id="imagePreview"
alt="预览图"
/>
</div>
<div
class="progress"
id="progressBar"
>
<div
class="progress-bar"
id="progressBarInner"
></div>
</div>
<div id="statusText"></div>
<div
class="result-section"
id="resultSection"
style="display: none"
>
<h2>提取结果</h2>
<div id="headerResults"></div>
</div>
</div>
<script>
//保证页面先加载完毕再执行下面代码
window.onload = function () {
// 全局变量
let imageFile = null;
// DOM元素
const fileInput = document.getElementById('fileInput');
const dropArea = document.getElementById('dropArea');
const imagePreview = document.getElementById('imagePreview');
const progressBar = document.getElementById('progressBar');
const progressBarInner = document.getElementById('progressBarInner');
const statusText = document.getElementById('statusText');
const resultSection = document.getElementById('resultSection');
const headerResults = document.getElementById('headerResults');
// 事件监听器
fileInput.addEventListener('change', handleFileSelect);
dropArea.addEventListener('dragover', handleDragOver);
dropArea.addEventListener('drop', handleDrop);
// 处理文件选择
function handleFileSelect(e) {
const file = e.target.files[0];
if (file && file.type.match('image.*')) {
processImageFile(file);
}
}
// 处理拖放
function handleDragOver(e) {
e.preventDefault();
e.stopPropagation();
dropArea.style.borderColor = '#4CAF50';
}
function handleDrop(e) {
e.preventDefault();
e.stopPropagation();
dropArea.style.borderColor = '#ccc';
const file = e.dataTransfer.files[0];
if (file && file.type.match('image.*')) {
processImageFile(file);
}
}
// 处理图片文件
function processImageFile(file) {
imageFile = file;
// 显示预览
const reader = new FileReader();
reader.onload = function (e) {
imagePreview.src = e.target.result;
imagePreview.style.display = 'block';
};
reader.readAsDataURL(file);
// 开始OCR处理
performOCR(file);
}
// 执行OCR识别
function performOCR(imageFile) {
progressBar.style.display = 'block';
statusText.textContent = '正在识别图片中的文本...';
Tesseract.recognize(
imageFile,
'eng+chi_sim', // 使用英文和简体中文识别
{
logger: (m) => updateProgress(m),
}
)
.then(({ data: { text } }) => {
statusText.textContent = '识别完成正在提取Header信息...';
extractHeaders(text);
})
.catch((err) => {
console.error(err);
statusText.textContent = '识别过程中出错: ' + err.message;
});
}
// 更新进度
function updateProgress(message) {
if (message.status === 'recognizing text') {
progressBarInner.style.width = `${message.progress * 100}%`;
}
if (message.status) {
statusText.textContent = `状态: ${message.status}`;
}
}
// 从文本中提取header信息
function extractHeaders(text) {
console.log('提取Header信息: ', text);
// 定义要提取的header键
const headerKeys = ['bkatimestamp', 'bkatimestamptoke', 'brcpEaSessionTicket', 'brcpEaDeviceId'];
// 初始化结果对象
const results = {};
headerKeys.forEach((key) => {
results[key] = '';
});
// 尝试从文本中提取每个header
headerKeys.forEach((key) => {
// 构造正则表达式匹配header键和值
// 考虑到日志可能有多种格式,使用更灵活的正则
const regex = new RegExp(`${key}[\\s:=]+([^\\s\\n]+)`, 'i');
debugger;
const match = text.match(regex);
if (match && match[1]) {
results[key] = match[1].replace(/[",;]/g, ''); // 清理常见干扰字符
}
});
// 显示结果
displayResults(results);
}
// 显示提取结果
function displayResults(results) {
headerResults.innerHTML = '';
for (const [key, value] of Object.entries(results)) {
const itemDiv = document.createElement('div');
itemDiv.className = 'header-item';
const labelSpan = document.createElement('span');
labelSpan.className = 'header-label';
labelSpan.textContent = key;
const valueSpan = document.createElement('span');
valueSpan.className = 'header-value';
valueSpan.textContent = value || '未找到';
const copyBtn = document.createElement('button');
copyBtn.className = 'copy-btn';
copyBtn.textContent = '复制';
copyBtn.onclick = () => {
if (value) {
navigator.clipboard.writeText(value).then(() => {
copyBtn.textContent = '已复制!';
setTimeout(() => {
copyBtn.textContent = '复制';
}, 2000);
});
}
};
itemDiv.appendChild(labelSpan);
itemDiv.appendChild(valueSpan);
itemDiv.appendChild(copyBtn);
headerResults.appendChild(itemDiv);
}
resultSection.style.display = 'block';
statusText.textContent = '提取完成!';
}
};
</script>
</body>
</html>
Loading…
Cancel
Save