爬虫开发
Some checks failed
Tests / PHP 8.2 (push) Has been cancelled
Tests / PHP 8.3 (push) Has been cancelled
Tests / PHP 8.4 (push) Has been cancelled

This commit is contained in:
cjd
2026-02-18 12:56:36 +08:00
parent a98bc6f13c
commit 260460df03
45 changed files with 4091 additions and 8 deletions

48
app/Models/CrawlAlert.php Normal file
View File

@@ -0,0 +1,48 @@
<?php
declare(strict_types=1);
namespace App\Models;
use App\Enums\CrawlAlertSeverity;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
class CrawlAlert extends Model
{
use HasFactory;
protected $fillable = [
'run_id',
'rule_id',
'severity',
'type',
'message',
'context',
'is_resolved',
'resolved_by',
'resolved_at',
];
protected function casts(): array
{
return [
'severity' => CrawlAlertSeverity::class,
'context' => 'array',
'is_resolved' => 'boolean',
'resolved_at' => 'datetime',
];
}
public function run(): BelongsTo
{
return $this->belongsTo(CrawlRun::class, 'run_id');
}
public function rule(): BelongsTo
{
return $this->belongsTo(CrawlRule::class, 'rule_id');
}
}

74
app/Models/CrawlRule.php Normal file
View File

@@ -0,0 +1,74 @@
<?php
declare(strict_types=1);
namespace App\Models;
use App\Enums\CrawlTargetModule;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany;
class CrawlRule extends Model
{
use HasFactory;
protected $fillable = [
'name',
'target_module',
'enabled',
'entry_urls',
'cron_expression',
'timezone',
'max_pages',
'render_js',
'user_agent',
'headers',
'cookies',
'proxy',
'rate_limit_per_minute',
'retry_max',
'retry_backoff_seconds',
'extractor_config',
'mapping_config',
'dedupe_config',
'ai_fallback_enabled',
'ai_provider',
'ai_model',
'publish_policy',
'alert_email',
'last_run_at',
'next_run_at',
'created_by',
'updated_by',
];
protected function casts(): array
{
return [
'target_module' => CrawlTargetModule::class,
'enabled' => 'boolean',
'entry_urls' => 'array',
'headers' => 'array',
'cookies' => 'array',
'extractor_config' => 'array',
'mapping_config' => 'array',
'dedupe_config' => 'array',
'render_js' => 'boolean',
'ai_fallback_enabled' => 'boolean',
'last_run_at' => 'datetime',
'next_run_at' => 'datetime',
];
}
public function runs(): HasMany
{
return $this->hasMany(CrawlRun::class, 'rule_id');
}
public function alerts(): HasMany
{
return $this->hasMany(CrawlAlert::class, 'rule_id');
}
}

59
app/Models/CrawlRun.php Normal file
View File

@@ -0,0 +1,59 @@
<?php
declare(strict_types=1);
namespace App\Models;
use App\Enums\CrawlRunStatus;
use App\Enums\CrawlTriggerType;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
use Illuminate\Database\Eloquent\Relations\HasMany;
class CrawlRun extends Model
{
use HasFactory;
protected $fillable = [
'rule_id',
'trigger_type',
'status',
'started_at',
'finished_at',
'total_urls',
'success_count',
'failed_count',
'skipped_count',
'error_summary',
'metrics',
'created_by',
];
protected function casts(): array
{
return [
'trigger_type' => CrawlTriggerType::class,
'status' => CrawlRunStatus::class,
'started_at' => 'datetime',
'finished_at' => 'datetime',
'metrics' => 'array',
];
}
public function rule(): BelongsTo
{
return $this->belongsTo(CrawlRule::class, 'rule_id');
}
public function items(): HasMany
{
return $this->hasMany(CrawlRunItem::class, 'run_id');
}
public function alerts(): HasMany
{
return $this->hasMany(CrawlAlert::class, 'run_id');
}
}

View File

@@ -0,0 +1,46 @@
<?php
declare(strict_types=1);
namespace App\Models;
use App\Enums\CrawlRunItemStatus;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
class CrawlRunItem extends Model
{
use HasFactory;
protected $fillable = [
'run_id',
'url',
'stage',
'attempt',
'status',
'latency_ms',
'http_code',
'error_code',
'error_message',
'raw_payload',
'normalized_payload',
'upsert_result',
];
protected function casts(): array
{
return [
'status' => CrawlRunItemStatus::class,
'raw_payload' => 'array',
'normalized_payload' => 'array',
'upsert_result' => 'array',
];
}
public function run(): BelongsTo
{
return $this->belongsTo(CrawlRun::class, 'run_id');
}
}