Files
ai-web/tests/Feature/CrawlerCommandTest.php

62 lines
1.9 KiB
PHP
Raw Permalink Normal View History

2026-02-18 12:56:36 +08:00
<?php
declare(strict_types=1);
namespace Tests\Feature;
use App\Enums\CrawlTargetModule;
use App\Models\CrawlRule;
use App\Services\Crawler\CrawlRuleScheduleService;
use App\Services\Crawler\XPathExtractor;
use Carbon\CarbonImmutable;
use Tests\TestCase;
class CrawlerCommandTest extends TestCase
{
public function test_xpath_extractor_can_extract_list_urls_and_fields(): void
{
$html = '<html><body><a href="/tools/a">A</a><a href="https://example.com/tools/b">B</a><h1>Alpha</h1><meta name="description" content="Alpha summary"></body></html>';
$extractor = new XPathExtractor();
$urls = $extractor->extractListUrls($html, 'https://example.com/list', [
'list_link_xpath' => '//a/@href',
]);
$this->assertSame([
'https://example.com/tools/a',
'https://example.com/tools/b',
], $urls);
$fields = $extractor->extractFields($html, [
'fields' => [
'name' => '//h1/text()',
'summary' => '//meta[@name="description"]/@content',
],
]);
$this->assertSame('Alpha', $fields['name']);
$this->assertSame('Alpha summary', $fields['summary']);
}
public function test_schedule_service_can_compute_due_and_next_time(): void
{
$rule = new CrawlRule([
'name' => 'test',
'target_module' => CrawlTargetModule::Tool,
'enabled' => true,
'cron_expression' => '*/5 * * * *',
'timezone' => 'Asia/Shanghai',
]);
$service = new CrawlRuleScheduleService();
$now = CarbonImmutable::parse('2026-02-18 10:10:00', 'Asia/Shanghai');
$this->assertTrue($service->isDue($rule, $now));
$nextRunAt = $service->nextRunAt($rule, $now);
$this->assertNotNull($nextRunAt);
$this->assertSame('2026-02-18 02:15:00', $nextRunAt?->format('Y-m-d H:i:s'));
}
}