483 lines
26 KiB
PHP
483 lines
26 KiB
PHP
@extends('layouts.admin')
|
||
|
||
@section('title', $item->exists ? '编辑采集规则' : '新建采集规则')
|
||
|
||
@section('head')
|
||
@include('admin.partials.modern-form-head')
|
||
@endsection
|
||
|
||
@section('scripts')
|
||
<script>
|
||
(function () {
|
||
const previewEndpoint = '{{ route('admin.crawlers.preview') }}';
|
||
const aiSuggestEndpoint = '{{ route('admin.crawlers.ai-suggest-extractor') }}';
|
||
const csrfToken = document.querySelector('meta[name="csrf-token"]')?.getAttribute('content') || '';
|
||
const previewUrlInput = document.getElementById('preview-url');
|
||
const previewFrame = document.getElementById('preview-frame');
|
||
const previewStatus = document.getElementById('preview-status');
|
||
const selectedXPathView = document.getElementById('selected-xpath');
|
||
const extractorJsonInput = document.getElementById('extractor-json');
|
||
const pickerFieldInput = document.getElementById('picker-field');
|
||
|
||
let selectedXPath = '';
|
||
|
||
const parseJson = (text) => {
|
||
try {
|
||
const data = JSON.parse(text || '{}');
|
||
return typeof data === 'object' && data ? data : {};
|
||
} catch (_) {
|
||
return null;
|
||
}
|
||
};
|
||
|
||
const writeExtractor = (config) => {
|
||
extractorJsonInput.value = JSON.stringify(config, null, 2);
|
||
};
|
||
|
||
const collectAiOptions = () => {
|
||
const options = {};
|
||
const model = document.getElementById('ai-model')?.value?.trim();
|
||
const systemPrompt = document.getElementById('ai-system-prompt')?.value?.trim();
|
||
const userPrompt = document.getElementById('ai-user-prompt')?.value?.trim();
|
||
const temperature = document.getElementById('ai-temperature')?.value?.trim();
|
||
const maxChars = document.getElementById('ai-content-max-chars')?.value?.trim();
|
||
|
||
if (model) options.model = model;
|
||
if (systemPrompt) options.system_prompt = systemPrompt;
|
||
if (userPrompt) options.user_prompt = userPrompt;
|
||
if (temperature !== '') options.temperature = Number(temperature);
|
||
if (maxChars !== '') options.content_max_chars = Number(maxChars);
|
||
|
||
return options;
|
||
};
|
||
|
||
const normalizeConfig = (config) => {
|
||
const normalized = config && typeof config === 'object' ? config : {};
|
||
normalized.mode = document.getElementById('extractor-mode')?.value || 'xpath';
|
||
normalized.ai = collectAiOptions();
|
||
if (Object.keys(normalized.ai).length === 0) {
|
||
delete normalized.ai;
|
||
}
|
||
if (!normalized.fields || typeof normalized.fields !== 'object') {
|
||
normalized.fields = {};
|
||
}
|
||
return normalized;
|
||
};
|
||
|
||
const postJson = async (url, payload) => {
|
||
const response = await fetch(url, {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
'X-CSRF-TOKEN': csrfToken,
|
||
'X-Requested-With': 'XMLHttpRequest',
|
||
},
|
||
body: JSON.stringify(payload),
|
||
});
|
||
|
||
const data = await response.json().catch(() => ({}));
|
||
if (!response.ok || data.ok === false) {
|
||
throw new Error(data.message || ('HTTP ' + response.status));
|
||
}
|
||
|
||
return data;
|
||
};
|
||
|
||
const installPicker = () => {
|
||
const frameWindow = previewFrame?.contentWindow;
|
||
const frameDocument = previewFrame?.contentDocument;
|
||
if (!frameWindow || !frameDocument || frameWindow.__pickerInstalled) {
|
||
return;
|
||
}
|
||
|
||
frameWindow.__pickerInstalled = true;
|
||
const script = frameDocument.createElement('script');
|
||
script.text = `
|
||
(function () {
|
||
try {
|
||
window.open = function () { return null; };
|
||
} catch (e) {}
|
||
const disableNavigation = () => {
|
||
document.querySelectorAll('a[href], area[href]').forEach((node) => {
|
||
const href = node.getAttribute('href') || '';
|
||
node.setAttribute('data-original-href', href);
|
||
node.setAttribute('href', 'javascript:void(0)');
|
||
node.removeAttribute('target');
|
||
});
|
||
document.querySelectorAll('form').forEach((form) => {
|
||
form.setAttribute('data-original-action', form.getAttribute('action') || '');
|
||
form.setAttribute('action', 'javascript:void(0)');
|
||
form.addEventListener('submit', (e) => {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}, true);
|
||
});
|
||
document.querySelectorAll('button, input[type="submit"], input[type="button"]').forEach((node) => {
|
||
node.addEventListener('click', (e) => {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}, true);
|
||
});
|
||
};
|
||
disableNavigation();
|
||
document.documentElement.style.cursor = 'crosshair';
|
||
document.body.style.cursor = 'crosshair';
|
||
document.querySelectorAll('*').forEach((node) => {
|
||
node.style.cursor = 'crosshair';
|
||
});
|
||
const xpath = (el) => {
|
||
if (!el || el.nodeType !== 1) return '';
|
||
if (el.id) return '//*[@id="' + el.id + '"]';
|
||
const parts = [];
|
||
let node = el;
|
||
while (node && node.nodeType === 1) {
|
||
let i = 1;
|
||
let p = node.previousElementSibling;
|
||
while (p) {
|
||
if (p.tagName === node.tagName) i += 1;
|
||
p = p.previousElementSibling;
|
||
}
|
||
parts.unshift(node.tagName.toLowerCase() + '[' + i + ']');
|
||
node = node.parentElement;
|
||
}
|
||
return '/' + parts.join('/');
|
||
};
|
||
document.addEventListener('mouseover', (e) => {
|
||
if (e.target instanceof Element) {
|
||
e.target.style.outline = '2px solid #2563eb';
|
||
}
|
||
}, true);
|
||
document.addEventListener('mouseout', (e) => {
|
||
if (e.target instanceof Element) {
|
||
e.target.style.outline = '';
|
||
}
|
||
}, true);
|
||
document.addEventListener('click', (e) => {
|
||
if (!(e.target instanceof Element)) return;
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
if (typeof e.stopImmediatePropagation === 'function') {
|
||
e.stopImmediatePropagation();
|
||
}
|
||
window.parent.postMessage({
|
||
source: 'crawler-picker',
|
||
xpath: xpath(e.target),
|
||
}, '*');
|
||
}, true);
|
||
document.addEventListener('mousedown', (e) => {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}, true);
|
||
document.addEventListener('mouseup', (e) => {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}, true);
|
||
document.addEventListener('auxclick', (e) => {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}, true);
|
||
document.addEventListener('keydown', (e) => {
|
||
if (e.key === 'Enter') {
|
||
e.preventDefault();
|
||
e.stopPropagation();
|
||
}
|
||
}, true);
|
||
}());
|
||
`;
|
||
frameDocument.body?.appendChild(script);
|
||
};
|
||
|
||
document.getElementById('preview-load-btn')?.addEventListener('click', async () => {
|
||
const url = previewUrlInput?.value?.trim();
|
||
if (!url) {
|
||
alert('请先输入 URL');
|
||
return;
|
||
}
|
||
|
||
previewStatus.textContent = '正在加载预览...';
|
||
try {
|
||
const data = await postJson(previewEndpoint, {
|
||
url,
|
||
user_agent: document.getElementById('user-agent')?.value || '',
|
||
});
|
||
previewFrame.srcdoc = data.html || '';
|
||
previewFrame.onload = installPicker;
|
||
previewStatus.textContent = '预览已加载,可点击页面元素。';
|
||
} catch (error) {
|
||
previewStatus.textContent = '加载失败:' + (error.message || 'unknown');
|
||
}
|
||
});
|
||
|
||
document.getElementById('apply-selector-btn')?.addEventListener('click', () => {
|
||
if (!selectedXPath) {
|
||
alert('请先在预览中点选元素');
|
||
return;
|
||
}
|
||
|
||
const field = (pickerFieldInput?.value || '').trim() || 'list_link_xpath';
|
||
const current = parseJson(extractorJsonInput.value);
|
||
if (current === null) {
|
||
alert('Extractor JSON 不是有效 JSON');
|
||
return;
|
||
}
|
||
|
||
const config = normalizeConfig(current);
|
||
if (field === 'list_link_xpath') {
|
||
config.list_link_xpath = selectedXPath;
|
||
} else {
|
||
config.fields[field] = selectedXPath;
|
||
}
|
||
writeExtractor(config);
|
||
});
|
||
|
||
document.getElementById('ai-suggest-btn')?.addEventListener('click', async () => {
|
||
const url = previewUrlInput?.value?.trim();
|
||
if (!url) {
|
||
alert('请先输入 URL');
|
||
return;
|
||
}
|
||
|
||
try {
|
||
const data = await postJson(aiSuggestEndpoint, {
|
||
url,
|
||
target_module: document.getElementById('target-module')?.value || 'tool',
|
||
user_agent: document.getElementById('user-agent')?.value || '',
|
||
ai_model: document.getElementById('ai-model')?.value || '',
|
||
ai_system_prompt: document.getElementById('ai-system-prompt')?.value || '',
|
||
ai_user_prompt: document.getElementById('ai-user-prompt')?.value || '',
|
||
ai_temperature: document.getElementById('ai-temperature')?.value || '',
|
||
ai_content_max_chars: document.getElementById('ai-content-max-chars')?.value || '',
|
||
});
|
||
|
||
const current = parseJson(extractorJsonInput.value);
|
||
const base = current && typeof current === 'object' ? current : {};
|
||
writeExtractor(normalizeConfig({
|
||
...base,
|
||
...data.extractor_config,
|
||
}));
|
||
previewStatus.textContent = 'AI 规则已生成并合并。';
|
||
} catch (error) {
|
||
previewStatus.textContent = 'AI 生成失败:' + (error.message || 'unknown');
|
||
}
|
||
});
|
||
|
||
window.addEventListener('message', (event) => {
|
||
if (!event.data || event.data.source !== 'crawler-picker') {
|
||
return;
|
||
}
|
||
selectedXPath = String(event.data.xpath || '').trim();
|
||
selectedXPathView.textContent = selectedXPath || '未选择';
|
||
});
|
||
|
||
document.getElementById('extractor-mode')?.addEventListener('change', () => {
|
||
const current = parseJson(extractorJsonInput.value);
|
||
if (current !== null) {
|
||
writeExtractor(normalizeConfig(current));
|
||
}
|
||
});
|
||
}());
|
||
</script>
|
||
@endsection
|
||
|
||
@section('content')
|
||
<div class="card modern-form-card">
|
||
<div class="card-header d-flex justify-content-between align-items-center">
|
||
<h3 class="card-title mb-0">{{ $item->exists ? '编辑采集规则' : '新建采集规则' }}</h3>
|
||
<a class="btn btn-sm btn-outline-secondary" href="{{ route('admin.crawlers.index') }}">返回列表</a>
|
||
</div>
|
||
<div class="card-body">
|
||
<form method="post" action="{{ $submitRoute }}" class="row g-3" id="crawler-form">
|
||
@csrf
|
||
@if($method !== 'POST') @method($method) @endif
|
||
|
||
@php
|
||
$entryUrls = old('entry_urls', is_array($item->entry_urls) ? implode("\n", $item->entry_urls) : '');
|
||
$headersJson = old('headers_json', json_encode($item->headers ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
|
||
$cookiesJson = old('cookies_json', json_encode($item->cookies ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
|
||
$extractorConfig = is_array($item->extractor_config) ? $item->extractor_config : [];
|
||
$extractorJson = old('extractor_json', json_encode($extractorConfig, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
|
||
$mappingJson = old('mapping_json', json_encode($item->mapping_config ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
|
||
$dedupeJson = old('dedupe_json', json_encode($item->dedupe_config ?? [], JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
|
||
$extractorAi = is_array($extractorConfig['ai'] ?? null) ? $extractorConfig['ai'] : [];
|
||
$mode = old('extractor_mode', $extractorConfig['mode'] ?? 'xpath');
|
||
@endphp
|
||
|
||
<div class="col-12">
|
||
<section class="form-section">
|
||
<h4 class="form-section-title">基础配置</h4>
|
||
<div class="row g-3">
|
||
<div class="col-md-6">
|
||
<label class="form-label">规则名称</label>
|
||
<input class="form-control" name="name" value="{{ old('name', $item->name) }}" required>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">目标模块</label>
|
||
<select class="form-select" name="target_module" id="target-module" required>
|
||
<option value="tool" @selected(old('target_module', $item->target_module?->value ?? 'tool') === 'tool')>AI 工具</option>
|
||
<option value="model" @selected(old('target_module', $item->target_module?->value ?? 'tool') === 'model')>AI 模型</option>
|
||
</select>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">发布策略</label>
|
||
<select class="form-select" name="publish_policy">
|
||
<option value="draft" @selected(old('publish_policy', $item->publish_policy ?? 'draft') === 'draft')>草稿待审核</option>
|
||
</select>
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">Cron 表达式</label>
|
||
<input class="form-control" name="cron_expression" value="{{ old('cron_expression', $item->cron_expression ?: '0 */6 * * *') }}" required>
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">时区</label>
|
||
<input class="form-control" name="timezone" value="{{ old('timezone', $item->timezone ?: 'Asia/Shanghai') }}" required>
|
||
</div>
|
||
<div class="col-md-2">
|
||
<label class="form-label">最大页面数</label>
|
||
<input class="form-control" type="number" min="1" max="2000" name="max_pages" value="{{ old('max_pages', $item->max_pages ?: 50) }}" required>
|
||
</div>
|
||
<div class="col-md-2">
|
||
<label class="form-label">启用</label>
|
||
<div class="form-check mt-2">
|
||
<input class="form-check-input" type="checkbox" name="enabled" value="1" @checked(old('enabled', $item->enabled))>
|
||
<label class="form-check-label">启用规则</label>
|
||
</div>
|
||
</div>
|
||
<div class="col-12">
|
||
<label class="form-label">入口 URL(每行一个)</label>
|
||
<textarea class="form-control" name="entry_urls" rows="4" required>{{ $entryUrls }}</textarea>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
|
||
<div class="col-12">
|
||
<section class="form-section">
|
||
<h4 class="form-section-title">抓取与 AI 配置</h4>
|
||
<div class="row g-3">
|
||
<div class="col-md-3">
|
||
<label class="form-label">每分钟限流</label>
|
||
<input class="form-control" type="number" min="1" max="2000" name="rate_limit_per_minute" value="{{ old('rate_limit_per_minute', $item->rate_limit_per_minute ?: 30) }}" required>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">最大重试次数</label>
|
||
<input class="form-control" type="number" min="1" max="10" name="retry_max" value="{{ old('retry_max', $item->retry_max ?: 3) }}" required>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">退避秒数</label>
|
||
<input class="form-control" type="number" min="1" max="3600" name="retry_backoff_seconds" value="{{ old('retry_backoff_seconds', $item->retry_backoff_seconds ?: 60) }}" required>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">告警邮箱</label>
|
||
<input class="form-control" type="email" name="alert_email" value="{{ old('alert_email', $item->alert_email) }}">
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">抽取模式</label>
|
||
<select class="form-select" name="extractor_mode" id="extractor-mode" required>
|
||
<option value="xpath" @selected($mode === 'xpath')>XPath</option>
|
||
<option value="ai" @selected($mode === 'ai')>AI</option>
|
||
<option value="hybrid" @selected($mode === 'hybrid')>Hybrid</option>
|
||
</select>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">AI Provider</label>
|
||
<input class="form-control" name="ai_provider" value="{{ old('ai_provider', $item->ai_provider ?: 'openai_compatible') }}">
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">AI Model</label>
|
||
<input class="form-control" name="ai_model" id="ai-model" value="{{ old('ai_model', $item->ai_model ?: config('crawler.openai_default_model')) }}">
|
||
</div>
|
||
<div class="col-md-3">
|
||
<label class="form-label">AI 温度</label>
|
||
<input class="form-control" type="number" step="0.1" min="0" max="2" name="ai_temperature" id="ai-temperature" value="{{ old('ai_temperature', $extractorAi['temperature'] ?? 0) }}">
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">AI 截断长度</label>
|
||
<input class="form-control" type="number" min="500" max="50000" name="ai_content_max_chars" id="ai-content-max-chars" value="{{ old('ai_content_max_chars', $extractorAi['content_max_chars'] ?? 12000) }}">
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">AI 系统提示词</label>
|
||
<textarea class="form-control" name="ai_system_prompt" id="ai-system-prompt" rows="3">{{ old('ai_system_prompt', $extractorAi['system_prompt'] ?? '') }}</textarea>
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">AI 用户提示词</label>
|
||
<textarea class="form-control" name="ai_user_prompt" id="ai-user-prompt" rows="3">{{ old('ai_user_prompt', $extractorAi['user_prompt'] ?? '') }}</textarea>
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">User-Agent</label>
|
||
<input class="form-control" name="user_agent" id="user-agent" value="{{ old('user_agent', $item->user_agent) }}">
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">代理</label>
|
||
<input class="form-control" name="proxy" value="{{ old('proxy', $item->proxy) }}">
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">AI 兜底</label>
|
||
<div class="form-check mt-2">
|
||
<input class="form-check-input" type="checkbox" name="ai_fallback_enabled" value="1" @checked(old('ai_fallback_enabled', $item->ai_fallback_enabled))>
|
||
<label class="form-check-label">缺字段启用兜底</label>
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<label class="form-label">Headers JSON</label>
|
||
<textarea class="form-control" name="headers_json" rows="5">{{ $headersJson }}</textarea>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<label class="form-label">Cookies JSON</label>
|
||
<textarea class="form-control" name="cookies_json" rows="5">{{ $cookiesJson }}</textarea>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
|
||
<div class="col-12">
|
||
<section class="form-section">
|
||
<h4 class="form-section-title">Extractor / Mapping / 预览选元素</h4>
|
||
<div class="row g-3">
|
||
<div class="col-md-6">
|
||
<label class="form-label">Extractor JSON</label>
|
||
<textarea class="form-control" name="extractor_json" id="extractor-json" rows="14" required>{{ $extractorJson }}</textarea>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<label class="form-label">Mapping JSON</label>
|
||
<textarea class="form-control" name="mapping_json" rows="6">{{ $mappingJson }}</textarea>
|
||
<label class="form-label mt-3">Dedupe JSON</label>
|
||
<textarea class="form-control" name="dedupe_json" rows="6">{{ $dedupeJson }}</textarea>
|
||
</div>
|
||
<div class="col-md-9">
|
||
<input class="form-control" type="url" id="preview-url" placeholder="输入目标页面 URL(用于预览和 AI 生成规则)">
|
||
</div>
|
||
<div class="col-md-3 d-grid">
|
||
<button class="btn btn-outline-primary" type="button" id="preview-load-btn">加载预览</button>
|
||
</div>
|
||
<div class="col-12">
|
||
<iframe id="preview-frame" style="width:100%;height:480px;border:1px solid #d7e0ef;border-radius:.6rem;" sandbox="allow-same-origin allow-scripts"></iframe>
|
||
<div class="small text-muted mt-2" id="preview-status">未加载预览</div>
|
||
</div>
|
||
<div class="col-md-5">
|
||
<label class="form-label">当前 XPath</label>
|
||
<div id="selected-xpath" class="form-control" style="height:auto;min-height:42px;">未选择</div>
|
||
</div>
|
||
<div class="col-md-4">
|
||
<label class="form-label">写入字段(支持自定义)</label>
|
||
<input class="form-control" id="picker-field" placeholder="list_link_xpath 或 name/summary/...">
|
||
</div>
|
||
<div class="col-md-3 d-grid">
|
||
<label class="form-label"> </label>
|
||
<button class="btn btn-primary" type="button" id="apply-selector-btn">写入 Extractor JSON</button>
|
||
</div>
|
||
<div class="col-md-12 d-grid">
|
||
<button class="btn btn-outline-success" type="button" id="ai-suggest-btn">AI 生成抽取规则并合并到 Extractor JSON</button>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
|
||
<div class="col-12 d-flex justify-content-between align-items-center">
|
||
<small class="text-muted">建议流程:加载预览 -> 点选元素写 XPath -> AI 补全规则 -> 保存。</small>
|
||
<button class="btn btn-primary" type="submit">保存规则</button>
|
||
</div>
|
||
</form>
|
||
</div>
|
||
</div>
|
||
@endsection
|