爬虫开发
This commit is contained in:
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('crawl_rules', function (Blueprint $table): void {
|
||||
$table->id();
|
||||
$table->string('name', 150);
|
||||
$table->string('target_module', 32);
|
||||
$table->boolean('enabled')->default(true);
|
||||
$table->json('entry_urls');
|
||||
$table->string('cron_expression', 64)->default('0 */6 * * *');
|
||||
$table->string('timezone', 64)->default('Asia/Shanghai');
|
||||
$table->unsignedSmallInteger('max_pages')->default(50);
|
||||
$table->boolean('render_js')->default(false);
|
||||
$table->string('user_agent', 255)->nullable();
|
||||
$table->json('headers')->nullable();
|
||||
$table->json('cookies')->nullable();
|
||||
$table->string('proxy', 255)->nullable();
|
||||
$table->unsignedSmallInteger('rate_limit_per_minute')->default(30);
|
||||
$table->unsignedTinyInteger('retry_max')->default(3);
|
||||
$table->unsignedSmallInteger('retry_backoff_seconds')->default(60);
|
||||
$table->json('extractor_config')->nullable();
|
||||
$table->json('mapping_config')->nullable();
|
||||
$table->json('dedupe_config')->nullable();
|
||||
$table->boolean('ai_fallback_enabled')->default(false);
|
||||
$table->string('ai_provider', 64)->nullable();
|
||||
$table->string('ai_model', 128)->nullable();
|
||||
$table->string('publish_policy', 32)->default('draft');
|
||||
$table->string('alert_email', 255)->nullable();
|
||||
$table->timestamp('last_run_at')->nullable();
|
||||
$table->timestamp('next_run_at')->nullable();
|
||||
$table->unsignedBigInteger('created_by')->nullable();
|
||||
$table->unsignedBigInteger('updated_by')->nullable();
|
||||
$table->timestamps();
|
||||
|
||||
$table->index(['enabled', 'next_run_at']);
|
||||
$table->index(['target_module', 'enabled']);
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('crawl_rules');
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('crawl_runs', function (Blueprint $table): void {
|
||||
$table->id();
|
||||
$table->foreignId('rule_id')->constrained('crawl_rules')->cascadeOnDelete();
|
||||
$table->string('trigger_type', 32)->default('manual');
|
||||
$table->string('status', 32)->default('pending');
|
||||
$table->timestamp('started_at')->nullable();
|
||||
$table->timestamp('finished_at')->nullable();
|
||||
$table->unsignedInteger('total_urls')->default(0);
|
||||
$table->unsignedInteger('success_count')->default(0);
|
||||
$table->unsignedInteger('failed_count')->default(0);
|
||||
$table->unsignedInteger('skipped_count')->default(0);
|
||||
$table->text('error_summary')->nullable();
|
||||
$table->json('metrics')->nullable();
|
||||
$table->unsignedBigInteger('created_by')->nullable();
|
||||
$table->timestamps();
|
||||
|
||||
$table->index(['rule_id', 'created_at']);
|
||||
$table->index(['status', 'created_at']);
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('crawl_runs');
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('crawl_run_items', function (Blueprint $table): void {
|
||||
$table->id();
|
||||
$table->foreignId('run_id')->constrained('crawl_runs')->cascadeOnDelete();
|
||||
$table->string('url', 2048);
|
||||
$table->string('stage', 32);
|
||||
$table->unsignedTinyInteger('attempt')->default(1);
|
||||
$table->string('status', 32)->default('success');
|
||||
$table->unsignedInteger('latency_ms')->nullable();
|
||||
$table->unsignedSmallInteger('http_code')->nullable();
|
||||
$table->string('error_code', 64)->nullable();
|
||||
$table->text('error_message')->nullable();
|
||||
$table->json('raw_payload')->nullable();
|
||||
$table->json('normalized_payload')->nullable();
|
||||
$table->json('upsert_result')->nullable();
|
||||
$table->timestamps();
|
||||
|
||||
$table->index(['run_id', 'status']);
|
||||
$table->index(['run_id', 'stage']);
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('crawl_run_items');
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('crawl_alerts', function (Blueprint $table): void {
|
||||
$table->id();
|
||||
$table->foreignId('run_id')->nullable()->constrained('crawl_runs')->nullOnDelete();
|
||||
$table->foreignId('rule_id')->nullable()->constrained('crawl_rules')->nullOnDelete();
|
||||
$table->string('severity', 32)->default('warning');
|
||||
$table->string('type', 64);
|
||||
$table->string('message', 500);
|
||||
$table->json('context')->nullable();
|
||||
$table->boolean('is_resolved')->default(false);
|
||||
$table->unsignedBigInteger('resolved_by')->nullable();
|
||||
$table->timestamp('resolved_at')->nullable();
|
||||
$table->timestamps();
|
||||
|
||||
$table->index(['is_resolved', 'severity']);
|
||||
$table->index(['rule_id', 'created_at']);
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('crawl_alerts');
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user