爬虫开发
This commit is contained in:
48
app/Models/CrawlAlert.php
Normal file
48
app/Models/CrawlAlert.php
Normal file
@@ -0,0 +1,48 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use App\Enums\CrawlAlertSeverity;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
|
||||
class CrawlAlert extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'run_id',
|
||||
'rule_id',
|
||||
'severity',
|
||||
'type',
|
||||
'message',
|
||||
'context',
|
||||
'is_resolved',
|
||||
'resolved_by',
|
||||
'resolved_at',
|
||||
];
|
||||
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'severity' => CrawlAlertSeverity::class,
|
||||
'context' => 'array',
|
||||
'is_resolved' => 'boolean',
|
||||
'resolved_at' => 'datetime',
|
||||
];
|
||||
}
|
||||
|
||||
public function run(): BelongsTo
|
||||
{
|
||||
return $this->belongsTo(CrawlRun::class, 'run_id');
|
||||
}
|
||||
|
||||
public function rule(): BelongsTo
|
||||
{
|
||||
return $this->belongsTo(CrawlRule::class, 'rule_id');
|
||||
}
|
||||
}
|
||||
|
||||
74
app/Models/CrawlRule.php
Normal file
74
app/Models/CrawlRule.php
Normal file
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use App\Enums\CrawlTargetModule;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
|
||||
class CrawlRule extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'name',
|
||||
'target_module',
|
||||
'enabled',
|
||||
'entry_urls',
|
||||
'cron_expression',
|
||||
'timezone',
|
||||
'max_pages',
|
||||
'render_js',
|
||||
'user_agent',
|
||||
'headers',
|
||||
'cookies',
|
||||
'proxy',
|
||||
'rate_limit_per_minute',
|
||||
'retry_max',
|
||||
'retry_backoff_seconds',
|
||||
'extractor_config',
|
||||
'mapping_config',
|
||||
'dedupe_config',
|
||||
'ai_fallback_enabled',
|
||||
'ai_provider',
|
||||
'ai_model',
|
||||
'publish_policy',
|
||||
'alert_email',
|
||||
'last_run_at',
|
||||
'next_run_at',
|
||||
'created_by',
|
||||
'updated_by',
|
||||
];
|
||||
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'target_module' => CrawlTargetModule::class,
|
||||
'enabled' => 'boolean',
|
||||
'entry_urls' => 'array',
|
||||
'headers' => 'array',
|
||||
'cookies' => 'array',
|
||||
'extractor_config' => 'array',
|
||||
'mapping_config' => 'array',
|
||||
'dedupe_config' => 'array',
|
||||
'render_js' => 'boolean',
|
||||
'ai_fallback_enabled' => 'boolean',
|
||||
'last_run_at' => 'datetime',
|
||||
'next_run_at' => 'datetime',
|
||||
];
|
||||
}
|
||||
|
||||
public function runs(): HasMany
|
||||
{
|
||||
return $this->hasMany(CrawlRun::class, 'rule_id');
|
||||
}
|
||||
|
||||
public function alerts(): HasMany
|
||||
{
|
||||
return $this->hasMany(CrawlAlert::class, 'rule_id');
|
||||
}
|
||||
}
|
||||
|
||||
59
app/Models/CrawlRun.php
Normal file
59
app/Models/CrawlRun.php
Normal file
@@ -0,0 +1,59 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use App\Enums\CrawlRunStatus;
|
||||
use App\Enums\CrawlTriggerType;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
|
||||
class CrawlRun extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'rule_id',
|
||||
'trigger_type',
|
||||
'status',
|
||||
'started_at',
|
||||
'finished_at',
|
||||
'total_urls',
|
||||
'success_count',
|
||||
'failed_count',
|
||||
'skipped_count',
|
||||
'error_summary',
|
||||
'metrics',
|
||||
'created_by',
|
||||
];
|
||||
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'trigger_type' => CrawlTriggerType::class,
|
||||
'status' => CrawlRunStatus::class,
|
||||
'started_at' => 'datetime',
|
||||
'finished_at' => 'datetime',
|
||||
'metrics' => 'array',
|
||||
];
|
||||
}
|
||||
|
||||
public function rule(): BelongsTo
|
||||
{
|
||||
return $this->belongsTo(CrawlRule::class, 'rule_id');
|
||||
}
|
||||
|
||||
public function items(): HasMany
|
||||
{
|
||||
return $this->hasMany(CrawlRunItem::class, 'run_id');
|
||||
}
|
||||
|
||||
public function alerts(): HasMany
|
||||
{
|
||||
return $this->hasMany(CrawlAlert::class, 'run_id');
|
||||
}
|
||||
}
|
||||
|
||||
46
app/Models/CrawlRunItem.php
Normal file
46
app/Models/CrawlRunItem.php
Normal file
@@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
use App\Enums\CrawlRunItemStatus;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\BelongsTo;
|
||||
|
||||
class CrawlRunItem extends Model
|
||||
{
|
||||
use HasFactory;
|
||||
|
||||
protected $fillable = [
|
||||
'run_id',
|
||||
'url',
|
||||
'stage',
|
||||
'attempt',
|
||||
'status',
|
||||
'latency_ms',
|
||||
'http_code',
|
||||
'error_code',
|
||||
'error_message',
|
||||
'raw_payload',
|
||||
'normalized_payload',
|
||||
'upsert_result',
|
||||
];
|
||||
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'status' => CrawlRunItemStatus::class,
|
||||
'raw_payload' => 'array',
|
||||
'normalized_payload' => 'array',
|
||||
'upsert_result' => 'array',
|
||||
];
|
||||
}
|
||||
|
||||
public function run(): BelongsTo
|
||||
{
|
||||
return $this->belongsTo(CrawlRun::class, 'run_id');
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user