decodeJsonToArray($this->input('extractor_json')); $this->merge([ 'enabled' => $this->boolean('enabled'), 'render_js' => $this->boolean('render_js'), 'ai_fallback_enabled' => $this->boolean('ai_fallback_enabled'), 'extractor_mode' => $this->input('extractor_mode') ?: (string) ($extractorConfig['mode'] ?? 'xpath'), ]); } public function rules(): array { return [ 'name' => ['required', 'string', 'max:150'], 'target_module' => ['required', Rule::in(array_column(CrawlTargetModule::cases(), 'value'))], 'enabled' => ['nullable', 'boolean'], 'entry_urls' => ['required', 'string'], 'cron_expression' => ['required', 'string', 'max:64'], 'timezone' => ['required', 'string', 'max:64'], 'max_pages' => ['required', 'integer', 'between:1,2000'], 'render_js' => ['nullable', 'boolean'], 'user_agent' => ['nullable', 'string', 'max:255'], 'headers_json' => ['nullable', 'json'], 'cookies_json' => ['nullable', 'json'], 'proxy' => ['nullable', 'string', 'max:255'], 'rate_limit_per_minute' => ['required', 'integer', 'between:1,2000'], 'retry_max' => ['required', 'integer', 'between:1,10'], 'retry_backoff_seconds' => ['required', 'integer', 'between:1,3600'], 'extractor_json' => ['required', 'json'], 'extractor_mode' => ['required', Rule::in(['xpath', 'ai', 'hybrid'])], 'mapping_json' => ['nullable', 'json'], 'dedupe_json' => ['nullable', 'json'], 'ai_fallback_enabled' => ['nullable', 'boolean'], 'ai_provider' => ['nullable', 'string', 'max:64'], 'ai_model' => ['nullable', 'string', 'max:128'], 'ai_system_prompt' => ['nullable', 'string', 'max:4000'], 'ai_user_prompt' => ['nullable', 'string', 'max:4000'], 'ai_temperature' => ['nullable', 'numeric', 'between:0,2'], 'ai_content_max_chars' => ['nullable', 'integer', 'between:500,50000'], 'publish_policy' => ['required', Rule::in(['draft'])], 'alert_email' => ['nullable', 'email'], ]; } public function messages(): array { return [ 'name.required' => '请填写规则名称。', 'target_module.required' => '请选择目标模块。', 'entry_urls.required' => '请至少填写一个入口 URL。', 'cron_expression.required' => '请填写 Cron 表达式。', 'timezone.required' => '请填写时区。', 'max_pages.required' => '请填写最大页面数。', 'max_pages.integer' => '最大页面数必须是整数。', 'max_pages.between' => '最大页面数需在 1 到 2000 之间。', 'rate_limit_per_minute.required' => '请填写每分钟限流值。', 'rate_limit_per_minute.integer' => '每分钟限流值必须是整数。', 'rate_limit_per_minute.between' => '每分钟限流值需在 1 到 2000 之间。', 'retry_max.required' => '请填写最大重试次数。', 'retry_max.integer' => '最大重试次数必须是整数。', 'retry_max.between' => '最大重试次数需在 1 到 10 之间。', 'retry_backoff_seconds.required' => '请填写重试退避秒数。', 'retry_backoff_seconds.integer' => '重试退避秒数必须是整数。', 'retry_backoff_seconds.between' => '重试退避秒数需在 1 到 3600 之间。', 'extractor_json.required' => '请填写 Extractor JSON。', 'extractor_json.json' => 'Extractor JSON 格式不合法。', 'extractor_mode.required' => '请选择抽取模式。', 'extractor_mode.in' => '抽取模式仅支持 xpath、ai、hybrid。', 'mapping_json.json' => 'Mapping JSON 格式不合法。', 'dedupe_json.json' => 'Dedupe JSON 格式不合法。', 'headers_json.json' => 'Headers JSON 格式不合法。', 'cookies_json.json' => 'Cookies JSON 格式不合法。', 'ai_temperature.between' => 'AI 温度需在 0 到 2 之间。', 'ai_content_max_chars.between' => 'AI 内容截断长度需在 500 到 50000 之间。', 'alert_email.email' => '告警邮箱格式不合法。', ]; } public function attributes(): array { return [ 'name' => '规则名称', 'target_module' => '目标模块', 'entry_urls' => '入口 URL', 'cron_expression' => 'Cron 表达式', 'timezone' => '时区', 'max_pages' => '最大页面数', 'rate_limit_per_minute' => '每分钟限流', 'retry_max' => '最大重试次数', 'retry_backoff_seconds' => '重试退避秒数', 'extractor_json' => 'Extractor JSON', 'extractor_mode' => '抽取模式', 'mapping_json' => 'Mapping JSON', 'dedupe_json' => 'Dedupe JSON', 'headers_json' => 'Headers JSON', 'cookies_json' => 'Cookies JSON', 'ai_system_prompt' => 'AI 系统提示词', 'ai_user_prompt' => 'AI 用户提示词', 'ai_temperature' => 'AI 温度', 'ai_content_max_chars' => 'AI 内容截断长度', 'alert_email' => '告警邮箱', ]; } /** * @return array */ public function normalizedPayload(): array { $payload = $this->validated(); $extractorConfig = $this->decodeJsonToArray($payload['extractor_json'] ?? null); $extractorMode = (string) ($payload['extractor_mode'] ?? ($extractorConfig['mode'] ?? 'xpath')); if (! in_array($extractorMode, ['xpath', 'ai', 'hybrid'], true)) { $extractorMode = 'xpath'; } $extractorConfig['mode'] = $extractorMode; $aiConfig = $this->buildAiConfig($payload); if ($aiConfig !== []) { $extractorConfig['ai'] = $aiConfig; } else { unset($extractorConfig['ai']); } return [ 'name' => $payload['name'], 'target_module' => $payload['target_module'], 'enabled' => (bool) ($payload['enabled'] ?? false), 'entry_urls' => $this->parseEntryUrls((string) ($payload['entry_urls'] ?? '')), 'cron_expression' => trim((string) $payload['cron_expression']), 'timezone' => trim((string) $payload['timezone']), 'max_pages' => (int) $payload['max_pages'], 'render_js' => (bool) ($payload['render_js'] ?? false), 'user_agent' => $this->nullableTrim($payload['user_agent'] ?? null), 'headers' => $this->decodeJsonToArray($payload['headers_json'] ?? null), 'cookies' => $this->decodeJsonToArray($payload['cookies_json'] ?? null), 'proxy' => $this->nullableTrim($payload['proxy'] ?? null), 'rate_limit_per_minute' => (int) $payload['rate_limit_per_minute'], 'retry_max' => (int) $payload['retry_max'], 'retry_backoff_seconds' => (int) $payload['retry_backoff_seconds'], 'extractor_config' => $extractorConfig, 'mapping_config' => $this->decodeJsonToArray($payload['mapping_json'] ?? null), 'dedupe_config' => $this->decodeJsonToArray($payload['dedupe_json'] ?? null), 'ai_fallback_enabled' => (bool) ($payload['ai_fallback_enabled'] ?? false), 'ai_provider' => $this->nullableTrim($payload['ai_provider'] ?? null), 'ai_model' => $this->nullableTrim($payload['ai_model'] ?? null), 'publish_policy' => (string) $payload['publish_policy'], 'alert_email' => $this->nullableTrim($payload['alert_email'] ?? null), ]; } /** * @param array $payload * @return array */ private function buildAiConfig(array $payload): array { $aiConfig = []; $systemPrompt = $this->nullableTrim($payload['ai_system_prompt'] ?? null); if ($systemPrompt !== null) { $aiConfig['system_prompt'] = $systemPrompt; } $userPrompt = $this->nullableTrim($payload['ai_user_prompt'] ?? null); if ($userPrompt !== null) { $aiConfig['user_prompt'] = $userPrompt; } if (isset($payload['ai_temperature']) && $payload['ai_temperature'] !== '') { $aiConfig['temperature'] = (float) $payload['ai_temperature']; } if (isset($payload['ai_content_max_chars']) && $payload['ai_content_max_chars'] !== '') { $aiConfig['content_max_chars'] = (int) $payload['ai_content_max_chars']; } $aiModel = $this->nullableTrim($payload['ai_model'] ?? null); if ($aiModel !== null) { $aiConfig['model'] = $aiModel; } return $aiConfig; } private function nullableTrim(mixed $value): ?string { if (! is_string($value)) { return null; } $trimmed = trim($value); return $trimmed === '' ? null : $trimmed; } /** * @return list */ private function parseEntryUrls(string $entryUrls): array { $lines = preg_split('/\r\n|\r|\n/', $entryUrls) ?: []; $urls = []; foreach ($lines as $line) { $candidate = trim($line); if ($candidate === '') { continue; } if (filter_var($candidate, FILTER_VALIDATE_URL) !== false) { $urls[] = $candidate; } } return array_values(array_unique($urls)); } /** * @return array */ private function decodeJsonToArray(mixed $value): array { if (! is_string($value) || trim($value) === '') { return []; } $decoded = json_decode($value, true); return is_array($decoded) ? $decoded : []; } }