<?php 
set_time_limit(0);

class SitemapGenerator {
    private $domain;
    private $sitemapDir = "xml-sitemap/";
    private $sitemapIndex = "sitemap.xml";
    private $maxUrlsPerFile = 5000;
    private $visitedFile = "visited_urlslist.txt";

    public function __construct($domain) {
        $this->domain = rtrim($domain, '/');
    }

    public function fetchUrls($startUrl) {
        $queue = [$startUrl];
        $visited = $this->loadVisitedUrls();
        $batch = [];
        $batchCount = 1;
        $sitemapFiles = [];

        while (!empty($queue)) {
            $currentUrl = array_shift($queue);
            if (isset($visited[$currentUrl])) continue;

            $visited[$currentUrl] = true;
            $batch[] = $currentUrl;

            // Save visited URLs
            file_put_contents($this->visitedFile, $currentUrl . PHP_EOL, FILE_APPEND | LOCK_EX);

            // Fetch page content with cURL
            $html = $this->curlGetContents($currentUrl);
            if (!$html) {
                echo "Failed to fetch: $currentUrl\n";
                continue;
            }

            // Extract links
            preg_match_all('/<a\s+href=["\']?([^"\'>]+)["\']?/i', $html, $matches);
            foreach ($matches[1] as $link) {
                $link = $this->formatUrl($link);
                if ($link && !isset($visited[$link]) && $this->isValidUrl($link)) {
                    $queue[] = $link;
                }
            }

            // Save batch if limit reached
            if (count($batch) >= $this->maxUrlsPerFile) {
                $sitemapFiles[] = $this->saveBatch($batch, $batchCount);
                $batch = [];
                $batchCount++;
            }
        }

        // Save remaining batch
        if (!empty($batch)) {
            $sitemapFiles[] = $this->saveBatch($batch, $batchCount);
        }

        // Generate index file
        $this->generateSitemapIndexFromDirectory();
    }

    private function curlGetContents($url) {
        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; SitemapBot/1.0; +https://www.winntus.com/bot)');
        $html = curl_exec($ch);
        curl_close($ch);
        return $html;
    }

    private function saveBatch($urls, $batchNumber) {
        if (!file_exists($this->sitemapDir)) {
            mkdir($this->sitemapDir, 0777, true);
        }
        $filename = "sitemap{$batchNumber}.xml";
        $content = "<?xml version='1.0' encoding='UTF-8'?>\n";
        $content .= "<urlset xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xhtml='http://www.w3.org/1999/xhtml' xmlns='http://www.sitemaps.org/schemas/sitemap/0.9' xsi:schemaLocation='http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd'>\n";
        foreach ($urls as $url) {
            $priority = ($url === $this->domain) ? "1.0000" : "0.8000";
            $content .= "<url><loc>{$url}</loc><lastmod>" . date('Y-m-d\TH:i:sP') . "</lastmod><changefreq>daily</changefreq><priority>{$priority}</priority></url>\n";
        }
        $content .= "</urlset>";
        file_put_contents($this->sitemapDir . $filename, $content, LOCK_EX);
        return $filename;
    }

    private function generateSitemapIndexFromDirectory() {
        if (!file_exists($this->sitemapDir)) {
            echo "Sitemap directory does not exist.\n";
            return;
        }

        $dirPath = rtrim($this->sitemapDir, '/') . '/';

        if (!is_readable($dirPath)) {
            echo "Directory not readable: $dirPath\n";
            return;
        }

        $sitemapFiles = array_filter(glob($dirPath . "sitemap*.xml"), function($file) use ($dirPath) {
            return basename($file) !== "sitemap.xml";
        });

        if (empty($sitemapFiles)) {
            echo "No sitemap files found in: $dirPath\n";
            return;
        } else {
            echo "Found " . count($sitemapFiles) . " sitemap files.\n";
        }

        if (count($sitemapFiles) === 1) {
            rename($sitemapFiles[0], $this->sitemapDir . $this->sitemapIndex);
            echo "Single sitemap file found. Renamed to sitemap.xml\n";
            return;
        }

        $sitemapIndexPath = $this->sitemapDir . $this->sitemapIndex;
        $content = "<?xml version='1.0' encoding='UTF-8'?>\n";
        $content .= "<sitemapindex xmlns='http://www.sitemaps.org/schemas/sitemap/0.9' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd'>\n";

        foreach ($sitemapFiles as $file) {
            $filename = basename($file);
            $fileUrl = $this->domain . "/" . $this->sitemapDir . $filename;
            $content .= "<sitemap><loc>{$fileUrl}</loc><lastmod>" . date('Y-m-d\TH:i:sP') . "</lastmod></sitemap>\n";
        }

        $content .= "</sitemapindex>";
        file_put_contents($sitemapIndexPath, $content, LOCK_EX);
        echo "Sitemap index created successfully.\n";
    }

    private function formatUrl($link) {
        if (empty($link) || strpos($link, 'mailto:') === 0 || strpos($link, 'tel:') === 0) {
            return false;
        }
        $parsedUrl = parse_url($link);
        if (!isset($parsedUrl['scheme'])) {
            return rtrim($this->domain, '/') . '/' . ltrim($link, '/');
        }
        return $link;
    }

    private function isValidUrl($url) {
        return (strpos($url, $this->domain) === 0) && (preg_match('/\.htm$/', $url) || substr($url, -1) === '/');
    }

    private function loadVisitedUrls() {
        $visited = [];
        if (file_exists($this->visitedFile)) {
            $lines = file($this->visitedFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
            foreach ($lines as $line) {
                $visited[$line] = true;
            }
        }
        return $visited;
    }

    public function run() {
        if (file_exists($this->visitedFile)) {
            unlink($this->visitedFile);
        }
        $this->fetchUrls($this->domain);
    }
}

// Run the script
$domain = "https://www.winntus.com";
$sitemap = new SitemapGenerator($domain);
$sitemap->run();

echo "Sitemap generation completed.\n";
?>