278 lines
9.3 KiB
PHP
278 lines
9.3 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Services\Crawler;
|
|
|
|
use App\Enums\EntityStatus;
|
|
use App\Enums\SourceLevel;
|
|
use App\Models\AiModel;
|
|
use App\Models\Category;
|
|
use App\Models\CrawlRule;
|
|
use App\Models\Source;
|
|
use App\Models\Tool;
|
|
use App\Services\ModelScoringService;
|
|
use Illuminate\Support\Str;
|
|
|
|
class CrawlEntityUpsertService
|
|
{
|
|
public function __construct(private readonly ModelScoringService $modelScoringService)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $payload
|
|
* @return array<string, mixed>
|
|
*/
|
|
public function upsert(CrawlRule $rule, array $payload, string $detailUrl): array
|
|
{
|
|
$mapped = $this->applyMapping($payload, is_array($rule->mapping_config) ? $rule->mapping_config : []);
|
|
|
|
return match ($rule->target_module?->value) {
|
|
'model' => $this->upsertModel($mapped, $detailUrl),
|
|
default => $this->upsertTool($mapped, $detailUrl),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $payload
|
|
* @param array<string, mixed> $mapping
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function applyMapping(array $payload, array $mapping): array
|
|
{
|
|
if ($mapping === []) {
|
|
return $payload;
|
|
}
|
|
|
|
$result = $payload;
|
|
|
|
foreach ($mapping as $target => $source) {
|
|
if (! is_string($target) || ! is_string($source)) {
|
|
continue;
|
|
}
|
|
|
|
if (array_key_exists($source, $payload)) {
|
|
$result[$target] = $payload[$source];
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $payload
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function upsertTool(array $payload, string $detailUrl): array
|
|
{
|
|
$name = trim((string) ($payload['name'] ?? ''));
|
|
if ($name === '') {
|
|
throw new \RuntimeException('Tool payload missing name');
|
|
}
|
|
|
|
$slug = trim((string) ($payload['slug'] ?? ''));
|
|
$slug = $slug !== '' ? Str::slug($slug) : Str::slug($name);
|
|
$slug = $slug !== '' ? $slug : 'tool-'.Str::lower(Str::random(8));
|
|
|
|
$officialUrl = trim((string) ($payload['official_url'] ?? $payload['url'] ?? $detailUrl));
|
|
$canonicalUrl = trim((string) ($payload['canonical_url'] ?? ''));
|
|
$summary = trim((string) ($payload['summary'] ?? ''));
|
|
|
|
if ($summary === '') {
|
|
$summary = mb_substr(trim((string) ($payload['description'] ?? $name)), 0, 240);
|
|
}
|
|
|
|
$source = $this->resolveSource($officialUrl !== '' ? $officialUrl : $detailUrl);
|
|
$categoryId = $this->resolveCategoryId('tool', $payload);
|
|
|
|
$entity = Tool::query()
|
|
->when($officialUrl !== '', static function ($query) use ($officialUrl): void {
|
|
$query->where('official_url', $officialUrl)->orWhere('canonical_url', $officialUrl);
|
|
}, static function ($query) use ($slug, $name): void {
|
|
$query->where('slug', $slug)->orWhereRaw('LOWER(name) = ?', [mb_strtolower($name)]);
|
|
})
|
|
->first();
|
|
|
|
$action = $entity === null ? 'created' : 'updated';
|
|
|
|
$entity ??= new Tool();
|
|
|
|
$entity->fill([
|
|
'category_id' => $categoryId,
|
|
'source_id' => $source?->id,
|
|
'name' => $name,
|
|
'slug' => $this->resolveUniqueSlug(Tool::class, $slug, $entity->id),
|
|
'summary' => mb_substr($summary, 0, 260),
|
|
'description' => (string) ($payload['description'] ?? ''),
|
|
'official_url' => $officialUrl !== '' ? $officialUrl : null,
|
|
'logo_url' => (string) ($payload['logo_url'] ?? ''),
|
|
'pricing_type' => (string) ($payload['pricing_type'] ?? 'unknown'),
|
|
'platform' => (string) ($payload['platform'] ?? ''),
|
|
'language' => (string) ($payload['language'] ?? ''),
|
|
'has_api' => (bool) ($payload['has_api'] ?? false),
|
|
'source_level' => $source?->trust_level ?? SourceLevel::Unknown,
|
|
'status' => EntityStatus::Draft,
|
|
'canonical_url' => $canonicalUrl !== '' ? $canonicalUrl : null,
|
|
'last_verified_at' => now(),
|
|
]);
|
|
|
|
$entity->save();
|
|
|
|
return [
|
|
'action' => $action,
|
|
'entity' => Tool::class,
|
|
'entity_id' => $entity->id,
|
|
'name' => $entity->name,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $payload
|
|
* @return array<string, mixed>
|
|
*/
|
|
private function upsertModel(array $payload, string $detailUrl): array
|
|
{
|
|
$name = trim((string) ($payload['name'] ?? ''));
|
|
if ($name === '') {
|
|
throw new \RuntimeException('Model payload missing name');
|
|
}
|
|
|
|
$slug = trim((string) ($payload['slug'] ?? ''));
|
|
$slug = $slug !== '' ? Str::slug($slug) : Str::slug($name);
|
|
$slug = $slug !== '' ? $slug : 'model-'.Str::lower(Str::random(8));
|
|
|
|
$summary = trim((string) ($payload['summary'] ?? ''));
|
|
if ($summary === '') {
|
|
$summary = mb_substr(trim((string) ($payload['description'] ?? $name)), 0, 240);
|
|
}
|
|
|
|
$officialUrl = trim((string) ($payload['official_url'] ?? $payload['url'] ?? $detailUrl));
|
|
$canonicalUrl = trim((string) ($payload['canonical_url'] ?? ''));
|
|
|
|
$source = $this->resolveSource($officialUrl !== '' ? $officialUrl : $detailUrl);
|
|
$categoryId = $this->resolveCategoryId('model', $payload);
|
|
|
|
$entity = AiModel::query()
|
|
->when($officialUrl !== '', static function ($query) use ($officialUrl): void {
|
|
$query->where('canonical_url', $officialUrl);
|
|
}, static function ($query) use ($slug, $name): void {
|
|
$query->where('slug', $slug)->orWhereRaw('LOWER(name) = ?', [mb_strtolower($name)]);
|
|
})
|
|
->first();
|
|
|
|
$action = $entity === null ? 'created' : 'updated';
|
|
|
|
$entity ??= new AiModel();
|
|
|
|
$entity->fill([
|
|
'category_id' => $categoryId,
|
|
'source_id' => $source?->id,
|
|
'name' => $name,
|
|
'slug' => $this->resolveUniqueSlug(AiModel::class, $slug, $entity->id),
|
|
'provider' => (string) ($payload['provider'] ?? ''),
|
|
'summary' => mb_substr($summary, 0, 260),
|
|
'description' => (string) ($payload['description'] ?? ''),
|
|
'modality' => (string) ($payload['modality'] ?? 'text'),
|
|
'context_window' => $this->toNullableInt($payload['context_window'] ?? null),
|
|
'price_input' => $this->toNullableFloat($payload['price_input'] ?? null),
|
|
'price_output' => $this->toNullableFloat($payload['price_output'] ?? null),
|
|
'deployment_mode' => (string) ($payload['deployment_mode'] ?? 'api'),
|
|
'effectiveness_score' => $this->boundedScore($payload['effectiveness_score'] ?? 60),
|
|
'price_score' => $this->boundedScore($payload['price_score'] ?? 60),
|
|
'speed_score' => $this->boundedScore($payload['speed_score'] ?? 60),
|
|
'source_level' => $source?->trust_level ?? SourceLevel::Unknown,
|
|
'status' => EntityStatus::Draft,
|
|
'canonical_url' => $canonicalUrl !== '' ? $canonicalUrl : ($officialUrl !== '' ? $officialUrl : null),
|
|
'last_verified_at' => now(),
|
|
]);
|
|
|
|
$this->modelScoringService->apply($entity);
|
|
$entity->save();
|
|
|
|
return [
|
|
'action' => $action,
|
|
'entity' => AiModel::class,
|
|
'entity_id' => $entity->id,
|
|
'name' => $entity->name,
|
|
];
|
|
}
|
|
|
|
private function resolveSource(string $url): ?Source
|
|
{
|
|
$host = parse_url($url, PHP_URL_HOST);
|
|
|
|
if (! is_string($host) || $host === '') {
|
|
return null;
|
|
}
|
|
|
|
return Source::query()->where('domain', $host)->first();
|
|
}
|
|
|
|
/**
|
|
* @param array<string, mixed> $payload
|
|
*/
|
|
private function resolveCategoryId(string $type, array $payload): ?int
|
|
{
|
|
$candidate = trim((string) ($payload['category_slug'] ?? $payload['category'] ?? ''));
|
|
|
|
if ($candidate === '') {
|
|
return null;
|
|
}
|
|
|
|
$category = Category::query()
|
|
->where('type', $type)
|
|
->where(static function ($query) use ($candidate): void {
|
|
$query->where('slug', $candidate)->orWhere('name', $candidate);
|
|
})
|
|
->first();
|
|
|
|
return $category?->id;
|
|
}
|
|
|
|
/**
|
|
* @param class-string<\Illuminate\Database\Eloquent\Model> $modelClass
|
|
*/
|
|
private function resolveUniqueSlug(string $modelClass, string $slug, ?int $exceptId = null): string
|
|
{
|
|
$finalSlug = $slug;
|
|
$suffix = 1;
|
|
|
|
while ($modelClass::query()
|
|
->when($exceptId !== null, static fn ($query) => $query->where('id', '!=', $exceptId))
|
|
->where('slug', $finalSlug)
|
|
->exists()) {
|
|
$finalSlug = sprintf('%s-%d', $slug, $suffix);
|
|
$suffix++;
|
|
}
|
|
|
|
return $finalSlug;
|
|
}
|
|
|
|
private function boundedScore(mixed $value): int
|
|
{
|
|
$score = (int) $value;
|
|
|
|
return max(0, min(100, $score));
|
|
}
|
|
|
|
private function toNullableInt(mixed $value): ?int
|
|
{
|
|
if ($value === null || $value === '') {
|
|
return null;
|
|
}
|
|
|
|
return (int) $value;
|
|
}
|
|
|
|
private function toNullableFloat(mixed $value): ?float
|
|
{
|
|
if ($value === null || $value === '') {
|
|
return null;
|
|
}
|
|
|
|
return (float) $value;
|
|
}
|
|
}
|
|
|