Files
ai-web/resources/views/admin/crawlers/form.blade.php
cjd 260460df03
Some checks failed
Tests / PHP 8.2 (push) Has been cancelled
Tests / PHP 8.3 (push) Has been cancelled
Tests / PHP 8.4 (push) Has been cancelled
爬虫开发
2026-02-18 12:56:36 +08:00

483 lines
26 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
@extends('layouts.admin')
@section('title', $item->exists ? '编辑采集规则' : '新建采集规则')
@section('head')
@include('admin.partials.modern-form-head')
@endsection
@section('scripts')
<script>
(function () {
const previewEndpoint = '{{ route('admin.crawlers.preview') }}';
const aiSuggestEndpoint = '{{ route('admin.crawlers.ai-suggest-extractor') }}';
const csrfToken = document.querySelector('meta[name="csrf-token"]')?.getAttribute('content') || '';
const previewUrlInput = document.getElementById('preview-url');
const previewFrame = document.getElementById('preview-frame');
const previewStatus = document.getElementById('preview-status');
const selectedXPathView = document.getElementById('selected-xpath');
const extractorJsonInput = document.getElementById('extractor-json');
const pickerFieldInput = document.getElementById('picker-field');
let selectedXPath = '';
const parseJson = (text) => {
try {
const data = JSON.parse(text || '{}');
return typeof data === 'object' && data ? data : {};
} catch (_) {
return null;
}
};
const writeExtractor = (config) => {
extractorJsonInput.value = JSON.stringify(config, null, 2);
};
const collectAiOptions = () => {
const options = {};
const model = document.getElementById('ai-model')?.value?.trim();
const systemPrompt = document.getElementById('ai-system-prompt')?.value?.trim();
const userPrompt = document.getElementById('ai-user-prompt')?.value?.trim();
const temperature = document.getElementById('ai-temperature')?.value?.trim();
const maxChars = document.getElementById('ai-content-max-chars')?.value?.trim();
if (model) options.model = model;
if (systemPrompt) options.system_prompt = systemPrompt;
if (userPrompt) options.user_prompt = userPrompt;
if (temperature !== '') options.temperature = Number(temperature);
if (maxChars !== '') options.content_max_chars = Number(maxChars);
return options;
};
const normalizeConfig = (config) => {
const normalized = config && typeof config === 'object' ? config : {};
normalized.mode = document.getElementById('extractor-mode')?.value || 'xpath';
normalized.ai = collectAiOptions();
if (Object.keys(normalized.ai).length === 0) {
delete normalized.ai;
}
if (!normalized.fields || typeof normalized.fields !== 'object') {
normalized.fields = {};
}
return normalized;
};
const postJson = async (url, payload) => {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRF-TOKEN': csrfToken,
'X-Requested-With': 'XMLHttpRequest',
},
body: JSON.stringify(payload),
});
const data = await response.json().catch(() => ({}));
if (!response.ok || data.ok === false) {
throw new Error(data.message || ('HTTP ' + response.status));
}
return data;
};
const installPicker = () => {
const frameWindow = previewFrame?.contentWindow;
const frameDocument = previewFrame?.contentDocument;
if (!frameWindow || !frameDocument || frameWindow.__pickerInstalled) {
return;
}
frameWindow.__pickerInstalled = true;
const script = frameDocument.createElement('script');
script.text = `
(function () {
try {
window.open = function () { return null; };
} catch (e) {}
const disableNavigation = () => {
document.querySelectorAll('a[href], area[href]').forEach((node) => {
const href = node.getAttribute('href') || '';
node.setAttribute('data-original-href', href);
node.setAttribute('href', 'javascript:void(0)');
node.removeAttribute('target');
});
document.querySelectorAll('form').forEach((form) => {
form.setAttribute('data-original-action', form.getAttribute('action') || '');
form.setAttribute('action', 'javascript:void(0)');
form.addEventListener('submit', (e) => {
e.preventDefault();
e.stopPropagation();
}, true);
});
document.querySelectorAll('button, input[type="submit"], input[type="button"]').forEach((node) => {
node.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
}, true);
});
};
disableNavigation();
document.documentElement.style.cursor = 'crosshair';
document.body.style.cursor = 'crosshair';
document.querySelectorAll('*').forEach((node) => {
node.style.cursor = 'crosshair';
});
const xpath = (el) => {
if (!el || el.nodeType !== 1) return '';
if (el.id) return '//*[@id="' + el.id + '"]';
const parts = [];
let node = el;
while (node && node.nodeType === 1) {
let i = 1;
let p = node.previousElementSibling;
while (p) {
if (p.tagName === node.tagName) i += 1;
p = p.previousElementSibling;
}
parts.unshift(node.tagName.toLowerCase() + '[' + i + ']');
node = node.parentElement;
}
return '/' + parts.join('/');
};
document.addEventListener('mouseover', (e) => {
if (e.target instanceof Element) {
e.target.style.outline = '2px solid #2563eb';
}
}, true);
document.addEventListener('mouseout', (e) => {
if (e.target instanceof Element) {
e.target.style.outline = '';
}
}, true);
document.addEventListener('click', (e) => {
if (!(e.target instanceof Element)) return;
e.preventDefault();
e.stopPropagation();
if (typeof e.stopImmediatePropagation === 'function') {
e.stopImmediatePropagation();
}
window.parent.postMessage({
source: 'crawler-picker',
xpath: xpath(e.target),
}, '*');
}, true);
document.addEventListener('mousedown', (e) => {
e.preventDefault();
e.stopPropagation();
}, true);
document.addEventListener('mouseup', (e) => {
e.preventDefault();
e.stopPropagation();
}, true);
document.addEventListener('auxclick', (e) => {
e.preventDefault();
e.stopPropagation();
}, true);
document.addEventListener('keydown', (e) => {
if (e.key === 'Enter') {
e.preventDefault();
e.stopPropagation();
}
}, true);
}());
`;
frameDocument.body?.appendChild(script);
};
document.getElementById('preview-load-btn')?.addEventListener('click', async () => {
const url = previewUrlInput?.value?.trim();
if (!url) {
alert('请先输入 URL');
return;
}
previewStatus.textContent = '正在加载预览...';
try {
const data = await postJson(previewEndpoint, {
url,
user_agent: document.getElementById('user-agent')?.value || '',
});
previewFrame.srcdoc = data.html || '';
previewFrame.onload = installPicker;
previewStatus.textContent = '预览已加载,可点击页面元素。';
} catch (error) {
previewStatus.textContent = '加载失败:' + (error.message || 'unknown');
}
});
document.getElementById('apply-selector-btn')?.addEventListener('click', () => {
if (!selectedXPath) {
alert('请先在预览中点选元素');
return;
}
const field = (pickerFieldInput?.value || '').trim() || 'list_link_xpath';
const current = parseJson(extractorJsonInput.value);
if (current === null) {
alert('Extractor JSON 不是有效 JSON');
return;
}
const config = normalizeConfig(current);
if (field === 'list_link_xpath') {
config.list_link_xpath = selectedXPath;
} else {
config.fields[field] = selectedXPath;
}
writeExtractor(config);
});
document.getElementById('ai-suggest-btn')?.addEventListener('click', async () => {
const url = previewUrlInput?.value?.trim();
if (!url) {
alert('请先输入 URL');
return;
}
try {
const data = await postJson(aiSuggestEndpoint, {
url,
target_module: document.getElementById('target-module')?.value || 'tool',
user_agent: document.getElementById('user-agent')?.value || '',
ai_model: document.getElementById('ai-model')?.value || '',
ai_system_prompt: document.getElementById('ai-system-prompt')?.value || '',
ai_user_prompt: document.getElementById('ai-user-prompt')?.value || '',
ai_temperature: document.getElementById('ai-temperature')?.value || '',
ai_content_max_chars: document.getElementById('ai-content-max-chars')?.value || '',
});
const current = parseJson(extractorJsonInput.value);
const base = current && typeof current === 'object' ? current : {};
writeExtractor(normalizeConfig({
...base,
...data.extractor_config,
}));
previewStatus.textContent = 'AI 规则已生成并合并。';
} catch (error) {
previewStatus.textContent = 'AI 生成失败:' + (error.message || 'unknown');
}
});
window.addEventListener('message', (event) => {
if (!event.data || event.data.source !== 'crawler-picker') {
return;
}
selectedXPath = String(event.data.xpath || '').trim();
selectedXPathView.textContent = selectedXPath || '未选择';
});
document.getElementById('extractor-mode')?.addEventListener('change', () => {
const current = parseJson(extractorJsonInput.value);
if (current !== null) {
writeExtractor(normalizeConfig(current));
}
});
}());
</script>
@endsection
@section('content')
<div class="card modern-form-card">
<div class="card-header d-flex justify-content-between align-items-center">
<h3 class="card-title mb-0">{{ $item->exists ? '编辑采集规则' : '新建采集规则' }}</h3>
<a class="btn btn-sm btn-outline-secondary" href="{{ route('admin.crawlers.index') }}">返回列表</a>
</div>
<div class="card-body">
<form method="post" action="{{ $submitRoute }}" class="row g-3" id="crawler-form">
@csrf
@if($method !== 'POST') @method($method) @endif
@php
$entryUrls = old('entry_urls', is_array($item->entry_urls) ? implode("\n", $item->entry_urls) : '');
$headersJson = old('headers_json', json_encode($item->headers ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
$cookiesJson = old('cookies_json', json_encode($item->cookies ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
$extractorConfig = is_array($item->extractor_config) ? $item->extractor_config : [];
$extractorJson = old('extractor_json', json_encode($extractorConfig, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
$mappingJson = old('mapping_json', json_encode($item->mapping_config ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
$dedupeJson = old('dedupe_json', json_encode($item->dedupe_config ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
$extractorAi = is_array($extractorConfig['ai'] ?? null) ? $extractorConfig['ai'] : [];
$mode = old('extractor_mode', $extractorConfig['mode'] ?? 'xpath');
@endphp
<div class="col-12">
<section class="form-section">
<h4 class="form-section-title">基础配置</h4>
<div class="row g-3">
<div class="col-md-6">
<label class="form-label">规则名称</label>
<input class="form-control" name="name" value="{{ old('name', $item->name) }}" required>
</div>
<div class="col-md-3">
<label class="form-label">目标模块</label>
<select class="form-select" name="target_module" id="target-module" required>
<option value="tool" @selected(old('target_module', $item->target_module?->value ?? 'tool') === 'tool')>AI 工具</option>
<option value="model" @selected(old('target_module', $item->target_module?->value ?? 'tool') === 'model')>AI 模型</option>
</select>
</div>
<div class="col-md-3">
<label class="form-label">发布策略</label>
<select class="form-select" name="publish_policy">
<option value="draft" @selected(old('publish_policy', $item->publish_policy ?? 'draft') === 'draft')>草稿待审核</option>
</select>
</div>
<div class="col-md-4">
<label class="form-label">Cron 表达式</label>
<input class="form-control" name="cron_expression" value="{{ old('cron_expression', $item->cron_expression ?: '0 */6 * * *') }}" required>
</div>
<div class="col-md-4">
<label class="form-label">时区</label>
<input class="form-control" name="timezone" value="{{ old('timezone', $item->timezone ?: 'Asia/Shanghai') }}" required>
</div>
<div class="col-md-2">
<label class="form-label">最大页面数</label>
<input class="form-control" type="number" min="1" max="2000" name="max_pages" value="{{ old('max_pages', $item->max_pages ?: 50) }}" required>
</div>
<div class="col-md-2">
<label class="form-label">启用</label>
<div class="form-check mt-2">
<input class="form-check-input" type="checkbox" name="enabled" value="1" @checked(old('enabled', $item->enabled))>
<label class="form-check-label">启用规则</label>
</div>
</div>
<div class="col-12">
<label class="form-label">入口 URL每行一个</label>
<textarea class="form-control" name="entry_urls" rows="4" required>{{ $entryUrls }}</textarea>
</div>
</div>
</section>
</div>
<div class="col-12">
<section class="form-section">
<h4 class="form-section-title">抓取与 AI 配置</h4>
<div class="row g-3">
<div class="col-md-3">
<label class="form-label">每分钟限流</label>
<input class="form-control" type="number" min="1" max="2000" name="rate_limit_per_minute" value="{{ old('rate_limit_per_minute', $item->rate_limit_per_minute ?: 30) }}" required>
</div>
<div class="col-md-3">
<label class="form-label">最大重试次数</label>
<input class="form-control" type="number" min="1" max="10" name="retry_max" value="{{ old('retry_max', $item->retry_max ?: 3) }}" required>
</div>
<div class="col-md-3">
<label class="form-label">退避秒数</label>
<input class="form-control" type="number" min="1" max="3600" name="retry_backoff_seconds" value="{{ old('retry_backoff_seconds', $item->retry_backoff_seconds ?: 60) }}" required>
</div>
<div class="col-md-3">
<label class="form-label">告警邮箱</label>
<input class="form-control" type="email" name="alert_email" value="{{ old('alert_email', $item->alert_email) }}">
</div>
<div class="col-md-3">
<label class="form-label">抽取模式</label>
<select class="form-select" name="extractor_mode" id="extractor-mode" required>
<option value="xpath" @selected($mode === 'xpath')>XPath</option>
<option value="ai" @selected($mode === 'ai')>AI</option>
<option value="hybrid" @selected($mode === 'hybrid')>Hybrid</option>
</select>
</div>
<div class="col-md-3">
<label class="form-label">AI Provider</label>
<input class="form-control" name="ai_provider" value="{{ old('ai_provider', $item->ai_provider ?: 'openai_compatible') }}">
</div>
<div class="col-md-3">
<label class="form-label">AI Model</label>
<input class="form-control" name="ai_model" id="ai-model" value="{{ old('ai_model', $item->ai_model ?: config('crawler.openai_default_model')) }}">
</div>
<div class="col-md-3">
<label class="form-label">AI 温度</label>
<input class="form-control" type="number" step="0.1" min="0" max="2" name="ai_temperature" id="ai-temperature" value="{{ old('ai_temperature', $extractorAi['temperature'] ?? 0) }}">
</div>
<div class="col-md-4">
<label class="form-label">AI 截断长度</label>
<input class="form-control" type="number" min="500" max="50000" name="ai_content_max_chars" id="ai-content-max-chars" value="{{ old('ai_content_max_chars', $extractorAi['content_max_chars'] ?? 12000) }}">
</div>
<div class="col-md-4">
<label class="form-label">AI 系统提示词</label>
<textarea class="form-control" name="ai_system_prompt" id="ai-system-prompt" rows="3">{{ old('ai_system_prompt', $extractorAi['system_prompt'] ?? '') }}</textarea>
</div>
<div class="col-md-4">
<label class="form-label">AI 用户提示词</label>
<textarea class="form-control" name="ai_user_prompt" id="ai-user-prompt" rows="3">{{ old('ai_user_prompt', $extractorAi['user_prompt'] ?? '') }}</textarea>
</div>
<div class="col-md-4">
<label class="form-label">User-Agent</label>
<input class="form-control" name="user_agent" id="user-agent" value="{{ old('user_agent', $item->user_agent) }}">
</div>
<div class="col-md-4">
<label class="form-label">代理</label>
<input class="form-control" name="proxy" value="{{ old('proxy', $item->proxy) }}">
</div>
<div class="col-md-4">
<label class="form-label">AI 兜底</label>
<div class="form-check mt-2">
<input class="form-check-input" type="checkbox" name="ai_fallback_enabled" value="1" @checked(old('ai_fallback_enabled', $item->ai_fallback_enabled))>
<label class="form-check-label">缺字段启用兜底</label>
</div>
</div>
<div class="col-md-6">
<label class="form-label">Headers JSON</label>
<textarea class="form-control" name="headers_json" rows="5">{{ $headersJson }}</textarea>
</div>
<div class="col-md-6">
<label class="form-label">Cookies JSON</label>
<textarea class="form-control" name="cookies_json" rows="5">{{ $cookiesJson }}</textarea>
</div>
</div>
</section>
</div>
<div class="col-12">
<section class="form-section">
<h4 class="form-section-title">Extractor / Mapping / 预览选元素</h4>
<div class="row g-3">
<div class="col-md-6">
<label class="form-label">Extractor JSON</label>
<textarea class="form-control" name="extractor_json" id="extractor-json" rows="14" required>{{ $extractorJson }}</textarea>
</div>
<div class="col-md-6">
<label class="form-label">Mapping JSON</label>
<textarea class="form-control" name="mapping_json" rows="6">{{ $mappingJson }}</textarea>
<label class="form-label mt-3">Dedupe JSON</label>
<textarea class="form-control" name="dedupe_json" rows="6">{{ $dedupeJson }}</textarea>
</div>
<div class="col-md-9">
<input class="form-control" type="url" id="preview-url" placeholder="输入目标页面 URL用于预览和 AI 生成规则)">
</div>
<div class="col-md-3 d-grid">
<button class="btn btn-outline-primary" type="button" id="preview-load-btn">加载预览</button>
</div>
<div class="col-12">
<iframe id="preview-frame" style="width:100%;height:480px;border:1px solid #d7e0ef;border-radius:.6rem;" sandbox="allow-same-origin allow-scripts"></iframe>
<div class="small text-muted mt-2" id="preview-status">未加载预览</div>
</div>
<div class="col-md-5">
<label class="form-label">当前 XPath</label>
<div id="selected-xpath" class="form-control" style="height:auto;min-height:42px;">未选择</div>
</div>
<div class="col-md-4">
<label class="form-label">写入字段(支持自定义)</label>
<input class="form-control" id="picker-field" placeholder="list_link_xpath 或 name/summary/...">
</div>
<div class="col-md-3 d-grid">
<label class="form-label">&nbsp;</label>
<button class="btn btn-primary" type="button" id="apply-selector-btn">写入 Extractor JSON</button>
</div>
<div class="col-md-12 d-grid">
<button class="btn btn-outline-success" type="button" id="ai-suggest-btn">AI 生成抽取规则并合并到 Extractor JSON</button>
</div>
</div>
</section>
</div>
<div class="col-12 d-flex justify-content-between align-items-center">
<small class="text-muted">建议流程:加载预览 -> 点选元素写 XPath -> AI 补全规则 -> 保存。</small>
<button class="btn btn-primary" type="submit">保存规则</button>
</div>
</form>
</div>
</div>
@endsection