Files
ai-web/app/Services/Crawler/CrawlEntityUpsertService.php

278 lines
9.3 KiB
PHP
Raw Normal View History

2026-02-18 12:56:36 +08:00
<?php
declare(strict_types=1);
namespace App\Services\Crawler;
use App\Enums\EntityStatus;
use App\Enums\SourceLevel;
use App\Models\AiModel;
use App\Models\Category;
use App\Models\CrawlRule;
use App\Models\Source;
use App\Models\Tool;
use App\Services\ModelScoringService;
use Illuminate\Support\Str;
class CrawlEntityUpsertService
{
public function __construct(private readonly ModelScoringService $modelScoringService)
{
}
/**
* @param array<string, mixed> $payload
* @return array<string, mixed>
*/
public function upsert(CrawlRule $rule, array $payload, string $detailUrl): array
{
$mapped = $this->applyMapping($payload, is_array($rule->mapping_config) ? $rule->mapping_config : []);
return match ($rule->target_module?->value) {
'model' => $this->upsertModel($mapped, $detailUrl),
default => $this->upsertTool($mapped, $detailUrl),
};
}
/**
* @param array<string, mixed> $payload
* @param array<string, mixed> $mapping
* @return array<string, mixed>
*/
private function applyMapping(array $payload, array $mapping): array
{
if ($mapping === []) {
return $payload;
}
$result = $payload;
foreach ($mapping as $target => $source) {
if (! is_string($target) || ! is_string($source)) {
continue;
}
if (array_key_exists($source, $payload)) {
$result[$target] = $payload[$source];
}
}
return $result;
}
/**
* @param array<string, mixed> $payload
* @return array<string, mixed>
*/
private function upsertTool(array $payload, string $detailUrl): array
{
$name = trim((string) ($payload['name'] ?? ''));
if ($name === '') {
throw new \RuntimeException('Tool payload missing name');
}
$slug = trim((string) ($payload['slug'] ?? ''));
$slug = $slug !== '' ? Str::slug($slug) : Str::slug($name);
$slug = $slug !== '' ? $slug : 'tool-'.Str::lower(Str::random(8));
$officialUrl = trim((string) ($payload['official_url'] ?? $payload['url'] ?? $detailUrl));
$canonicalUrl = trim((string) ($payload['canonical_url'] ?? ''));
$summary = trim((string) ($payload['summary'] ?? ''));
if ($summary === '') {
$summary = mb_substr(trim((string) ($payload['description'] ?? $name)), 0, 240);
}
$source = $this->resolveSource($officialUrl !== '' ? $officialUrl : $detailUrl);
$categoryId = $this->resolveCategoryId('tool', $payload);
$entity = Tool::query()
->when($officialUrl !== '', static function ($query) use ($officialUrl): void {
$query->where('official_url', $officialUrl)->orWhere('canonical_url', $officialUrl);
}, static function ($query) use ($slug, $name): void {
$query->where('slug', $slug)->orWhereRaw('LOWER(name) = ?', [mb_strtolower($name)]);
})
->first();
$action = $entity === null ? 'created' : 'updated';
$entity ??= new Tool();
$entity->fill([
'category_id' => $categoryId,
'source_id' => $source?->id,
'name' => $name,
'slug' => $this->resolveUniqueSlug(Tool::class, $slug, $entity->id),
'summary' => mb_substr($summary, 0, 260),
'description' => (string) ($payload['description'] ?? ''),
'official_url' => $officialUrl !== '' ? $officialUrl : null,
'logo_url' => (string) ($payload['logo_url'] ?? ''),
'pricing_type' => (string) ($payload['pricing_type'] ?? 'unknown'),
'platform' => (string) ($payload['platform'] ?? ''),
'language' => (string) ($payload['language'] ?? ''),
'has_api' => (bool) ($payload['has_api'] ?? false),
'source_level' => $source?->trust_level ?? SourceLevel::Unknown,
'status' => EntityStatus::Draft,
'canonical_url' => $canonicalUrl !== '' ? $canonicalUrl : null,
'last_verified_at' => now(),
]);
$entity->save();
return [
'action' => $action,
'entity' => Tool::class,
'entity_id' => $entity->id,
'name' => $entity->name,
];
}
/**
* @param array<string, mixed> $payload
* @return array<string, mixed>
*/
private function upsertModel(array $payload, string $detailUrl): array
{
$name = trim((string) ($payload['name'] ?? ''));
if ($name === '') {
throw new \RuntimeException('Model payload missing name');
}
$slug = trim((string) ($payload['slug'] ?? ''));
$slug = $slug !== '' ? Str::slug($slug) : Str::slug($name);
$slug = $slug !== '' ? $slug : 'model-'.Str::lower(Str::random(8));
$summary = trim((string) ($payload['summary'] ?? ''));
if ($summary === '') {
$summary = mb_substr(trim((string) ($payload['description'] ?? $name)), 0, 240);
}
$officialUrl = trim((string) ($payload['official_url'] ?? $payload['url'] ?? $detailUrl));
$canonicalUrl = trim((string) ($payload['canonical_url'] ?? ''));
$source = $this->resolveSource($officialUrl !== '' ? $officialUrl : $detailUrl);
$categoryId = $this->resolveCategoryId('model', $payload);
$entity = AiModel::query()
->when($officialUrl !== '', static function ($query) use ($officialUrl): void {
$query->where('canonical_url', $officialUrl);
}, static function ($query) use ($slug, $name): void {
$query->where('slug', $slug)->orWhereRaw('LOWER(name) = ?', [mb_strtolower($name)]);
})
->first();
$action = $entity === null ? 'created' : 'updated';
$entity ??= new AiModel();
$entity->fill([
'category_id' => $categoryId,
'source_id' => $source?->id,
'name' => $name,
'slug' => $this->resolveUniqueSlug(AiModel::class, $slug, $entity->id),
'provider' => (string) ($payload['provider'] ?? ''),
'summary' => mb_substr($summary, 0, 260),
'description' => (string) ($payload['description'] ?? ''),
'modality' => (string) ($payload['modality'] ?? 'text'),
'context_window' => $this->toNullableInt($payload['context_window'] ?? null),
'price_input' => $this->toNullableFloat($payload['price_input'] ?? null),
'price_output' => $this->toNullableFloat($payload['price_output'] ?? null),
'deployment_mode' => (string) ($payload['deployment_mode'] ?? 'api'),
'effectiveness_score' => $this->boundedScore($payload['effectiveness_score'] ?? 60),
'price_score' => $this->boundedScore($payload['price_score'] ?? 60),
'speed_score' => $this->boundedScore($payload['speed_score'] ?? 60),
'source_level' => $source?->trust_level ?? SourceLevel::Unknown,
'status' => EntityStatus::Draft,
'canonical_url' => $canonicalUrl !== '' ? $canonicalUrl : ($officialUrl !== '' ? $officialUrl : null),
'last_verified_at' => now(),
]);
$this->modelScoringService->apply($entity);
$entity->save();
return [
'action' => $action,
'entity' => AiModel::class,
'entity_id' => $entity->id,
'name' => $entity->name,
];
}
private function resolveSource(string $url): ?Source
{
$host = parse_url($url, PHP_URL_HOST);
if (! is_string($host) || $host === '') {
return null;
}
return Source::query()->where('domain', $host)->first();
}
/**
* @param array<string, mixed> $payload
*/
private function resolveCategoryId(string $type, array $payload): ?int
{
$candidate = trim((string) ($payload['category_slug'] ?? $payload['category'] ?? ''));
if ($candidate === '') {
return null;
}
$category = Category::query()
->where('type', $type)
->where(static function ($query) use ($candidate): void {
$query->where('slug', $candidate)->orWhere('name', $candidate);
})
->first();
return $category?->id;
}
/**
* @param class-string<\Illuminate\Database\Eloquent\Model> $modelClass
*/
private function resolveUniqueSlug(string $modelClass, string $slug, ?int $exceptId = null): string
{
$finalSlug = $slug;
$suffix = 1;
while ($modelClass::query()
->when($exceptId !== null, static fn ($query) => $query->where('id', '!=', $exceptId))
->where('slug', $finalSlug)
->exists()) {
$finalSlug = sprintf('%s-%d', $slug, $suffix);
$suffix++;
}
return $finalSlug;
}
private function boundedScore(mixed $value): int
{
$score = (int) $value;
return max(0, min(100, $score));
}
private function toNullableInt(mixed $value): ?int
{
if ($value === null || $value === '') {
return null;
}
return (int) $value;
}
private function toNullableFloat(mixed $value): ?float
{
if ($value === null || $value === '') {
return null;
}
return (float) $value;
}
}