HEX

File: //var/www/quadcode.com/node_modules/super-sitemap/dist/sampled.js
import dirTree from 'directory-tree';
import { XMLParser } from 'fast-xml-parser';
import { filterRoutes } from './sitemap.js';
/**
 * Given the URL to this project's sitemap, _which must have been generated by
 * Super Sitemap for this to work as designed_, returns an array containing:
 * 1. the URL of every static route, and
 * 2. one URL for every parameterized route.
 *
 * ```js
 * // Example result:
 * [ 'http://localhost:5173/', 'http://localhost:5173/about', 'http://localhost:5173/blog', 'http://localhost:5173/blog/hello-world', 'http://localhost:5173/blog/tag/red' ]
 * ```
 *
 * @public
 * @param sitemapUrl - E.g. http://localhost:5173/sitemap.xml
 * @returns Array of paths, one for each route; grouped by static, then dynamic; sub-sorted alphabetically.
 *
 * @remarks
 * - This is intended as a utility to gather unique URLs for SEO analysis,
 *   functional tests for public routes, etc.
 * - As a utility, the design favors ease of use for the developer over runtime
 *   performance, and consequently consumes `/sitemap.xml` directly, to avoid
 *   the developer needing to recreate and maintain a duplicate sitemap config,
 *   param values, exclusion rules, etc.
 * - LIMITATIONS:
 *   1. The result does not include `additionalPaths` from the sitemap config
 *      b/c it's impossible to identify those by pattern using only the result.
 *   2. This does not distinguish between routes that differ only due to a
 *      pattern matcher–e.g.`/foo/[foo]` and `/foo/[foo=integer]` will evaluated
 *      as `/foo/[foo]` and one sample URL will be returned.
 */
export async function sampledUrls(sitemapUrl) {
    const response = await fetch(sitemapUrl);
    const sitemapXml = await response.text();
    return await _sampledUrls(sitemapXml);
}
/**
 * Given the URL to this project's sitemap, _which must have been generated by
 * Super Sitemap for this to work as designed_, returns an array containing:
 * 1. the path of every static route, and
 * 2. one path for every parameterized route.
 *
 * ```js
 * // Example result:
 * [ '/', '/about', '/blog', '/blog/hello-world', '/blog/tag/red' ]
 * ```
 *
 * @public
 * @param sitemapUrl - E.g. http://localhost:5173/sitemap.xml
 * @returns Array of paths, one for each route; grouped by static, then dynamic; sub-sorted alphabetically.
 *
 * @remarks
 * - This is intended as a utility to gather unique paths for SEO analysis,
 *   functional tests for public routes, etc.
 * - As a utility, the design favors ease of use for the developer over runtime
 *   performance, and consequently consumes `/sitemap.xml` directly, to avoid
 *   the developer needing to recreate and maintain a duplicate sitemap config,
 *   param values, exclusion rules, etc.
 * - LIMITATIONS:
 *   1. The result does not include `additionalPaths` from the sitemap config
 *      b/c it's impossible to identify those by pattern using only the result.
 *   2. This does not distinguish between routes that differ only due to a
 *      pattern matcher–e.g.`/foo/[foo]` and `/foo/[foo=integer]` will evaluated
 *      as `/foo/[foo]` and one sample path will be returned.
 */
export async function sampledPaths(sitemapUrl) {
    const response = await fetch(sitemapUrl);
    const sitemapXml = await response.text();
    return await _sampledPaths(sitemapXml);
}
/**
 * Given the body of this site's sitemap.xml, returns an array containing:
 * 1. the URL of every static (non-parameterized) route, and
 * 2. one URL for every parameterized route.
 *
 * @private
 * @param sitemapXml - The XML string of the sitemap to analyze. This must have
 *                     been created by Super Sitemap to work as designed.
 * @returns Array of URLs, sorted alphabetically
 */
export async function _sampledUrls(sitemapXml) {
    const parser = new XMLParser();
    const sitemap = parser.parse(sitemapXml);
    let urls = [];
    // If this is a sitemap index, fetch all sub sitemaps and combine their URLs.
    // Note: _sampledUrls() is intended to be used by devs within Playwright
    // tests. Because of this, we know what host to expect and can replace
    // whatever origin the dev set with localhost:4173, which is where Playwright
    // serves the app during testing. For unit tests, our mock.js mocks also
    // expect this host.
    if (sitemap.sitemapindex) {
        const subSitemapUrls = sitemap.sitemapindex.sitemap.map((obj) => obj.loc);
        for (const url of subSitemapUrls) {
            const path = new URL(url).pathname;
            const res = await fetch('http://localhost:4173' + path);
            const xml = await res.text();
            const _sitemap = parser.parse(xml);
            const _urls = _sitemap.urlset.url.map((x) => x.loc);
            urls.push(..._urls);
        }
    }
    else {
        urls = sitemap.urlset.url.map((x) => x.loc);
    }
    // Can't use this because Playwright doesn't use Vite.
    // let routes = Object.keys(import.meta.glob('/src/routes/**/+page.svelte'));
    // Read /src/routes to build 'routes'.
    let routes = [];
    try {
        let projDir;
        const filePath = import.meta.url.slice(7); // Strip out "file://" protocol
        if (filePath.includes('node_modules')) {
            // Currently running as an npm package.
            projDir = filePath.split('node_modules')[0];
        }
        else {
            // Currently running unit tests during dev.
            projDir = filePath.split('/src/')[0];
            projDir += '/';
        }
        const dirTreeRes = dirTree(projDir + 'src/routes');
        routes = extractPaths(dirTreeRes);
        // Match +page.svelte or +page@.svelte (used to break out of a layout).
        //https://kit.svelte.dev/docs/advanced-routing#advanced-layouts-breaking-out-of-layouts
        routes = routes.filter((route) => route.match(/\+page.*\.svelte$/));
        // 1. Trim everything to left of '/src/routes/' so it starts with
        //    `src/routes/` as `filterRoutes()` expects.
        // 2. Remove all grouping segments. i.e. those starting with '(' and ending
        //    with ')'
        const i = routes[0].indexOf('/src/routes/');
        const regex = /\/\([^)]+\)/g;
        routes = routes.map((route) => route.slice(i).replace(regex, ''));
    }
    catch (err) {
        console.error('An error occurred:', err);
    }
    // Filter to reformat from file paths into site paths. The 2nd arg for
    // excludePatterns is empty the exclusion pattern was already applied during
    // generation of the sitemap.
    routes = filterRoutes(routes, []);
    // Remove any optional `/[[lang]]` prefix. We can just use the default language that
    // will not have this stem, for the purposes of this sampling. But ensure root
    // becomes '/', not an empty string.
    routes = routes.map((route) => {
        return route.replace(/\/?\[\[lang(=[a-z]+)?\]\]/, '') || '/';
    });
    // Separate static and dynamic routes. Remember these are _routes_ from disk
    // and consequently have not had any exclusion patterns applied against them,
    // they could contain `/about`, `/blog/[slug]`, routes that will need to be
    // excluded like `/dashboard`.
    const nonExcludedStaticRoutes = [];
    const nonExcludedDynamicRoutes = [];
    for (const route of routes) {
        if (/\[.*\]/.test(route)) {
            nonExcludedDynamicRoutes.push(route);
        }
        else {
            nonExcludedStaticRoutes.push(route);
        }
    }
    const ORIGIN = new URL(urls[0]).origin;
    const nonExcludedStaticRouteUrls = new Set(nonExcludedStaticRoutes.map((path) => ORIGIN + path));
    // Using URLs as the source, separate into static and dynamic routes. This:
    // 1. Gather URLs that are static routes. We cannot use staticRoutes items
    //    directly because it is generated from reading `/src/routes` and has not
    //    had the dev's `excludePatterns` applied so an excluded routes like
    //    `/dashboard` could exist within in, but _won't_ in the sitemap URLs.
    // 2. Removing static routes from the sitemap URLs before sampling for
    //    dynamic paths is necessary due to SvelteKit's route specificity rules.
    //    E.g. we remove paths like `/about` so they aren't sampled as a match for
    //    a dynamic route like `/[foo]`.
    const dynamicRouteUrls = [];
    const staticRouteUrls = [];
    for (const url of urls) {
        if (nonExcludedStaticRouteUrls.has(url)) {
            staticRouteUrls.push(url);
        }
        else {
            dynamicRouteUrls.push(url);
        }
    }
    // Convert dynamic route patterns into regex patterns.
    // - Use Set to make unique. Duplicates may occur given we haven't applied
    //   excludePatterns to the dynamic **routes** (e.g. `/blog/[page=integer]`
    //   and `/blog/[slug]` both become `/blog/[^/]+`). When we sample URLs for
    //   each of these patterns, however the excluded patterns won't exist in the
    //   URLs from the sitemap, so it's not a problem.
    // - ORIGIN is required, otherwise a false match can be found when one pattern
    //   is a subset of a another. Merely terminating with "$" is not sufficient
    //   an overlapping subset may still be found from the end.
    const regexPatterns = new Set(nonExcludedDynamicRoutes.map((path) => {
        const regexPattern = path.replace(/\[[^\]]+\]/g, '[^/]+');
        return ORIGIN + regexPattern + '$';
    }));
    // Gather a max of one URL for each dynamic route's regex pattern.
    // - Remember, a regex pattern may exist in these routes that was excluded by
    //   the exclusionPatterns when the sitemap was generated. This is OK because
    //   no URLs will exist to be matched with them.
    const sampledDynamicUrls = findFirstMatches(regexPatterns, dynamicRouteUrls);
    return [...staticRouteUrls.sort(), ...Array.from(sampledDynamicUrls).sort()];
}
/**
 * Given the body of this site's sitemap.xml, returns an array containing:
 * 1. the path of every static (non-parameterized) route, and
 * 2. one path for every parameterized route.
 *
 * @private
 * @param sitemapXml - The XML string of the sitemap to analyze. This must have
 *                     been created by Super Sitemap to work as designed.
 * @returns Array of paths, sorted alphabetically
 */
export async function _sampledPaths(sitemapXml) {
    const urls = await _sampledUrls(sitemapXml);
    return urls.map((url) => new URL(url).pathname);
}
/**
 * Given a set of strings, return the first matching string for every regex
 * within a set of regex patterns. It is possible and allowed for no match to be
 * found for a given regex.
 *
 * @private
 * @param regexPatterns - Set of regex patterns to search for.
 * @param haystack - Array of strings to search within.
 * @returns Set of strings where each is the first match found for a pattern.
 *
 * @example
 * ```ts
 * const patterns = new Set(["a.*", "b.*"]);
 * const haystack = ["apple", "banana", "cherry"];
 * const result = findFirstMatches(patterns, haystack); // Set { 'apple', 'banana' }
 * ```
 */
export function findFirstMatches(regexPatterns, haystack) {
    const firstMatches = new Set();
    for (const pattern of regexPatterns) {
        const regex = new RegExp(pattern);
        for (const needle of haystack) {
            if (regex.test(needle)) {
                firstMatches.add(needle);
                break;
            }
        }
    }
    return firstMatches;
}
/**
 * Extracts the paths from a dirTree response and returns an array of strings
 * representing full disk paths to each route and directory.
 * - This needs to be filtered to remove items that do not end in `+page.svelte`
 *   in order to represent routes; we do that outside of this function given
 *   this is recursive.
 *
 * @param obj - The dirTree response object. https://www.npmjs.com/package/directory-tree
 * @param paths - Array of existing paths to append to (leave unspecified; used
 * for recursion)
 * @returns An array of strings representing disk paths to each route.
 */
export function extractPaths(obj, paths = []) {
    if (obj.path) {
        paths.push(obj.path);
    }
    if (Array.isArray(obj.children)) {
        for (const child of obj.children) {
            extractPaths(child, paths);
        }
    }
    return paths;
}