爬虫开发
This commit is contained in:
62
tests/Feature/CrawlerCommandTest.php
Normal file
62
tests/Feature/CrawlerCommandTest.php
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Tests\Feature;
|
||||
|
||||
use App\Enums\CrawlTargetModule;
|
||||
use App\Models\CrawlRule;
|
||||
use App\Services\Crawler\CrawlRuleScheduleService;
|
||||
use App\Services\Crawler\XPathExtractor;
|
||||
use Carbon\CarbonImmutable;
|
||||
use Tests\TestCase;
|
||||
|
||||
class CrawlerCommandTest extends TestCase
|
||||
{
|
||||
public function test_xpath_extractor_can_extract_list_urls_and_fields(): void
|
||||
{
|
||||
$html = '<html><body><a href="/tools/a">A</a><a href="https://example.com/tools/b">B</a><h1>Alpha</h1><meta name="description" content="Alpha summary"></body></html>';
|
||||
|
||||
$extractor = new XPathExtractor();
|
||||
|
||||
$urls = $extractor->extractListUrls($html, 'https://example.com/list', [
|
||||
'list_link_xpath' => '//a/@href',
|
||||
]);
|
||||
|
||||
$this->assertSame([
|
||||
'https://example.com/tools/a',
|
||||
'https://example.com/tools/b',
|
||||
], $urls);
|
||||
|
||||
$fields = $extractor->extractFields($html, [
|
||||
'fields' => [
|
||||
'name' => '//h1/text()',
|
||||
'summary' => '//meta[@name="description"]/@content',
|
||||
],
|
||||
]);
|
||||
|
||||
$this->assertSame('Alpha', $fields['name']);
|
||||
$this->assertSame('Alpha summary', $fields['summary']);
|
||||
}
|
||||
|
||||
public function test_schedule_service_can_compute_due_and_next_time(): void
|
||||
{
|
||||
$rule = new CrawlRule([
|
||||
'name' => 'test',
|
||||
'target_module' => CrawlTargetModule::Tool,
|
||||
'enabled' => true,
|
||||
'cron_expression' => '*/5 * * * *',
|
||||
'timezone' => 'Asia/Shanghai',
|
||||
]);
|
||||
|
||||
$service = new CrawlRuleScheduleService();
|
||||
$now = CarbonImmutable::parse('2026-02-18 10:10:00', 'Asia/Shanghai');
|
||||
|
||||
$this->assertTrue($service->isDue($rule, $now));
|
||||
|
||||
$nextRunAt = $service->nextRunAt($rule, $now);
|
||||
|
||||
$this->assertNotNull($nextRunAt);
|
||||
$this->assertSame('2026-02-18 02:15:00', $nextRunAt?->format('Y-m-d H:i:s'));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user