爬虫开发
Some checks failed
Tests / PHP 8.2 (push) Has been cancelled
Tests / PHP 8.3 (push) Has been cancelled
Tests / PHP 8.4 (push) Has been cancelled

This commit is contained in:
cjd
2026-02-18 12:56:36 +08:00
parent a98bc6f13c
commit 260460df03
45 changed files with 4091 additions and 8 deletions

View File

@@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('crawl_rules', function (Blueprint $table): void {
$table->id();
$table->string('name', 150);
$table->string('target_module', 32);
$table->boolean('enabled')->default(true);
$table->json('entry_urls');
$table->string('cron_expression', 64)->default('0 */6 * * *');
$table->string('timezone', 64)->default('Asia/Shanghai');
$table->unsignedSmallInteger('max_pages')->default(50);
$table->boolean('render_js')->default(false);
$table->string('user_agent', 255)->nullable();
$table->json('headers')->nullable();
$table->json('cookies')->nullable();
$table->string('proxy', 255)->nullable();
$table->unsignedSmallInteger('rate_limit_per_minute')->default(30);
$table->unsignedTinyInteger('retry_max')->default(3);
$table->unsignedSmallInteger('retry_backoff_seconds')->default(60);
$table->json('extractor_config')->nullable();
$table->json('mapping_config')->nullable();
$table->json('dedupe_config')->nullable();
$table->boolean('ai_fallback_enabled')->default(false);
$table->string('ai_provider', 64)->nullable();
$table->string('ai_model', 128)->nullable();
$table->string('publish_policy', 32)->default('draft');
$table->string('alert_email', 255)->nullable();
$table->timestamp('last_run_at')->nullable();
$table->timestamp('next_run_at')->nullable();
$table->unsignedBigInteger('created_by')->nullable();
$table->unsignedBigInteger('updated_by')->nullable();
$table->timestamps();
$table->index(['enabled', 'next_run_at']);
$table->index(['target_module', 'enabled']);
});
}
public function down(): void
{
Schema::dropIfExists('crawl_rules');
}
};

View File

@@ -0,0 +1,39 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('crawl_runs', function (Blueprint $table): void {
$table->id();
$table->foreignId('rule_id')->constrained('crawl_rules')->cascadeOnDelete();
$table->string('trigger_type', 32)->default('manual');
$table->string('status', 32)->default('pending');
$table->timestamp('started_at')->nullable();
$table->timestamp('finished_at')->nullable();
$table->unsignedInteger('total_urls')->default(0);
$table->unsignedInteger('success_count')->default(0);
$table->unsignedInteger('failed_count')->default(0);
$table->unsignedInteger('skipped_count')->default(0);
$table->text('error_summary')->nullable();
$table->json('metrics')->nullable();
$table->unsignedBigInteger('created_by')->nullable();
$table->timestamps();
$table->index(['rule_id', 'created_at']);
$table->index(['status', 'created_at']);
});
}
public function down(): void
{
Schema::dropIfExists('crawl_runs');
}
};

View File

@@ -0,0 +1,39 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('crawl_run_items', function (Blueprint $table): void {
$table->id();
$table->foreignId('run_id')->constrained('crawl_runs')->cascadeOnDelete();
$table->string('url', 2048);
$table->string('stage', 32);
$table->unsignedTinyInteger('attempt')->default(1);
$table->string('status', 32)->default('success');
$table->unsignedInteger('latency_ms')->nullable();
$table->unsignedSmallInteger('http_code')->nullable();
$table->string('error_code', 64)->nullable();
$table->text('error_message')->nullable();
$table->json('raw_payload')->nullable();
$table->json('normalized_payload')->nullable();
$table->json('upsert_result')->nullable();
$table->timestamps();
$table->index(['run_id', 'status']);
$table->index(['run_id', 'stage']);
});
}
public function down(): void
{
Schema::dropIfExists('crawl_run_items');
}
};

View File

@@ -0,0 +1,36 @@
<?php
declare(strict_types=1);
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
public function up(): void
{
Schema::create('crawl_alerts', function (Blueprint $table): void {
$table->id();
$table->foreignId('run_id')->nullable()->constrained('crawl_runs')->nullOnDelete();
$table->foreignId('rule_id')->nullable()->constrained('crawl_rules')->nullOnDelete();
$table->string('severity', 32)->default('warning');
$table->string('type', 64);
$table->string('message', 500);
$table->json('context')->nullable();
$table->boolean('is_resolved')->default(false);
$table->unsignedBigInteger('resolved_by')->nullable();
$table->timestamp('resolved_at')->nullable();
$table->timestamps();
$table->index(['is_resolved', 'severity']);
$table->index(['rule_id', 'created_at']);
});
}
public function down(): void
{
Schema::dropIfExists('crawl_alerts');
}
};