OrcaSlicer_WIKI/.github/workflows/validate_internal_link.yml

name: Validate Internal Links

on:
  pull_request:
    paths:
      - '**/*.md'
      - '**/*.markdown'
      - '**/*.mdown'
      - '**/*.mkd'
      - '**/*.mkdn'
      - '**/*.mdx'

jobs:
  internal-link-validation:
    runs-on: ubuntu-latest
    permissions:
      contents: read
    env:
      ERROR_BLOCK: ''
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Validate internal documentation links
        id: validate_internal_links
        uses: actions/github-script@v9
        with:
          script: |
            const fs = require('fs');
            const path = require('path');

            const workspace = process.cwd();
            const workspaceRoot = path.resolve(workspace);
            const allowedExt = new Set(['.md', '.markdown', '.mdown', '.mkd', '.mkdn', '.mdx']);
            const markdownNameIndex = new Map();
            let markdownIndexReady = false;
            const candidateFiles = collectMarkdownFiles('');
            if (!candidateFiles.length) {
              core.info('No Markdown files found; skipping internal link validation.');
              return;
            }

            const fileContents = new Map();
            const references = [];
            const markdownLinkPattern = /\[(?<text>[^\]]*)\]\(\s*(?<url><[^>]+>|[^)\s]+)(?:\s+"[^"]*")?\s*\)/g;

            for (const relativePath of candidateFiles) {
              const absolutePath = path.join(workspaceRoot, relativePath);
              const text = fs.readFileSync(absolutePath, 'utf8');
              fileContents.set(relativePath, text);

              markdownLinkPattern.lastIndex = 0;
              let match;
              while ((match = markdownLinkPattern.exec(text)) !== null) {
                const previousChar = match.index > 0 ? text[match.index - 1] : '';
                if (previousChar === '!') {
                  continue; // Skip images.
                }
                const url = match.groups ? match.groups.url : match[2];
                if (!url) {
                  continue;
                }
                let target = url.trim();
                if (!target) {
                  continue;
                }
                if (target.startsWith('<') && target.endsWith('>')) {
                  target = target.slice(1, -1).trim();
                }
                if (!target) {
                  continue;
                }
                references.push({
                  filePath: relativePath,
                  line: lineFromIndex(text, match.index),
                  target,
                });
              }
            }

            if (!references.length) {
              core.info('No markdown links found in updated files.');
              return;
            }

            const headingCache = new Map();
            const failures = [];

            for (const reference of references) {
              const classification = classifyTarget(reference.target);
              if (classification.type === 'external' || classification.type === 'ignore') {
                continue;
              }

              if (classification.type === 'invalidHashCount') {
                failures.push(formatFailure(reference, 'invalidHashCount', classification.raw));
                continue;
              }

              if (classification.type === 'sameDocAnchor') {
                if (!classification.anchorSlug) {
                  failures.push(formatFailure(reference, 'sameDocEmptyAnchor', reference.target));
                  continue;
                }
                const decodedAnchor = decodeLinkComponent(classification.anchorRaw);
                if (!isKebabCase(decodedAnchor)) {
                  failures.push(formatFailure(reference, 'anchorNotKebabCase', `#${classification.anchorRaw}`));
                  continue;
                }
                const anchors = getAnchors(reference.filePath);
                if (!anchors.has(classification.anchorSlug)) {
                  failures.push(formatFailure(reference, 'missingSameDocAnchor', `#${classification.anchorRaw}`));
                }
                continue;
              }

              if (classification.type === 'docOnly' || classification.type === 'docWithAnchor') {
                const docResult = resolveDocumentPath(reference.filePath, classification.docPathRaw);
                if (docResult.error) {
                  failures.push(formatFailure(reference, docResult.error, docResult.detail || classification.docPathRaw));
                  continue;
                }

                if (!fs.existsSync(docResult.absolutePath)) {
                  failures.push(formatFailure(reference, 'missingDocument', docResult.relativePath || `${docResult.linkPath}.md`));
                  continue;
                }

                if (classification.type === 'docOnly') {
                  continue;
                }

                if (!classification.anchorSlug) {
                  failures.push(formatFailure(reference, 'crossDocEmptyAnchor', reference.target));
                  continue;
                }

                const decodedAnchor = decodeLinkComponent(classification.anchorRaw);
                if (!isKebabCase(decodedAnchor)) {
                  failures.push(formatFailure(reference, 'anchorNotKebabCase', `${docResult.linkPath}#${classification.anchorRaw}`));
                  continue;
                }

                const anchors = getAnchors(docResult.relativePath);
                if (!anchors.has(classification.anchorSlug)) {
                  failures.push(formatFailure(reference, 'missingCrossDocAnchor', `${docResult.linkPath}#${classification.anchorRaw}`));
                }
              }
            }

            if (failures.length) {
              const block = failures.join('\n');
              core.exportVariable('ERROR_BLOCK', block);
              return;
            }

            core.exportVariable('ERROR_BLOCK', '');
            core.info(`Validated ${references.length} internal markdown link(s). All constraints satisfied.`);

            function classifyTarget(target) {
              const trimmed = target.trim();
              if (!trimmed) {
                return { type: 'ignore' };
              }

              if (trimmed.startsWith('#')) {
                if (trimmed.indexOf('#', 1) !== -1) {
                  return { type: 'invalidHashCount', raw: trimmed };
                }
                const anchorRaw = trimmed.slice(1);
                return {
                  type: 'sameDocAnchor',
                  anchorRaw,
                  anchorSlug: normalizeAnchor(anchorRaw),
                };
              }

              if (/^[a-zA-Z][a-zA-Z0-9+.-]*:/.test(trimmed) || trimmed.startsWith('//')) {
                return { type: 'external' };
              }

              const firstHash = trimmed.indexOf('#');
              if (firstHash !== -1 && trimmed.indexOf('#', firstHash + 1) !== -1) {
                return { type: 'invalidHashCount', raw: trimmed };
              }

              const hashIndex = firstHash;
              if (hashIndex === -1) {
                return {
                  type: 'docOnly',
                  docPathRaw: trimmed,
                };
              }

              const docPathRaw = trimmed.slice(0, hashIndex);
              const anchorRaw = trimmed.slice(hashIndex + 1);
              return {
                type: docPathRaw ? 'docWithAnchor' : 'sameDocAnchor',
                docPathRaw,
                anchorRaw,
                anchorSlug: normalizeAnchor(anchorRaw),
              };
            }

            function resolveDocumentPath(fromFile, rawPath) {
              const result = {
                error: null,
                relativePath: null,
                absolutePath: null,
                linkPath: null,
                detail: rawPath,
              };

              if (!rawPath) {
                result.error = 'emptyDocPath';
                return result;
              }

              if (rawPath.includes('?')) {
                result.error = 'queryNotAllowed';
                return result;
              }

              let decoded = rawPath;
              try {
                decoded = decodeURIComponent(rawPath);
              } catch (_) {
                // Keep original when decoding fails.
              }

              const sanitized = decoded.replace(/\\/g, '/');
              if (!sanitized || sanitized === '.' || sanitized === '..') {
                result.error = 'missingDocName';
                result.detail = rawPath;
                return result;
              }

              if (sanitized.includes('/') || sanitized.includes('\\')) {
                result.error = 'pathNotAllowed';
                result.detail = rawPath;
                return result;
              }

              if (sanitized.toLowerCase().endsWith('.md')) {
                result.error = 'extensionNotAllowed';
                result.detail = rawPath;
                return result;
              }

              if (!isSnakeCase(sanitized)) {
                result.error = 'docNotSnakeCase';
                result.detail = rawPath;
                return result;
              }

              ensureMarkdownIndex();
              const matches = findMarkdownDocuments(sanitized);
              if (!matches.length) {
                result.error = 'missingDocument';
                result.detail = `${sanitized}.md`;
                result.linkPath = sanitized;
                return result;
              }

              if (matches.length > 1) {
                result.error = 'ambiguousDocument';
                result.detail = matches.slice(0, 5).join(', ');
                result.linkPath = sanitized;
                return result;
              }

              const relativePath = matches[0];
              const absolutePath = path.join(workspaceRoot, relativePath);
              const normalizedAbsolute = path.normalize(absolutePath);
              const relativeToWorkspace = path.relative(workspaceRoot, normalizedAbsolute);
              if (relativeToWorkspace.startsWith('..') || path.isAbsolute(relativeToWorkspace)) {
                result.error = 'outsideWorkspace';
                result.detail = rawPath;
                return result;
              }

              result.relativePath = relativePath;
              result.absolutePath = normalizedAbsolute;
              result.linkPath = sanitized;
              result.detail = relativePath;
              return result;
            }

            function ensureMarkdownIndex() {
              if (markdownIndexReady) {
                return;
              }
              indexMarkdownFiles('');
              markdownIndexReady = true;
            }

            function indexMarkdownFiles(relativeDir) {
              const absoluteDir = relativeDir ? path.join(workspaceRoot, relativeDir) : workspaceRoot;
              let entries;
              try {
                entries = fs.readdirSync(absoluteDir, { withFileTypes: true });
              } catch (_) {
                return;
              }

              for (const entry of entries) {
                if (entry.name === '.git') {
                  continue;
                }
                const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name;
                if (entry.isDirectory()) {
                  indexMarkdownFiles(relativePath);
                } else if (entry.isFile() && entry.name.toLowerCase().endsWith('.md')) {
                  const key = entry.name.slice(0, -3);
                  const normalized = relativePath.replace(/\\/g, '/');
                  if (markdownNameIndex.has(key)) {
                    markdownNameIndex.get(key).push(normalized);
                  } else {
                    markdownNameIndex.set(key, [normalized]);
                  }
                }
              }
            }

            function findMarkdownDocuments(baseName) {
              return markdownNameIndex.get(baseName) || [];
            }

            function isSnakeCase(value) {
              return /^[a-z0-9]+(?:_[a-z0-9]+)*$/.test(value);
            }

            function isKebabCase(value) {
              return /^[a-z0-9]+(?:-+[a-z0-9]+)*$/.test(value);
            }

            function decodeLinkComponent(value) {
              let decoded = value.trim();
              try {
                decoded = decodeURIComponent(decoded);
              } catch (_) {
                // Ignore decode failure.
              }
              return decoded;
            }

            function getAnchors(relativePath) {
              if (headingCache.has(relativePath)) {
                return headingCache.get(relativePath);
              }

              if (!fileContents.has(relativePath)) {
                const absolutePath = path.join(workspaceRoot, relativePath);
                if (!fs.existsSync(absolutePath)) {
                  headingCache.set(relativePath, new Set());
                  return headingCache.get(relativePath);
                }
                const text = fs.readFileSync(absolutePath, 'utf8');
                fileContents.set(relativePath, text);
              }

              const text = fileContents.get(relativePath);
              const anchors = collectHeadingAnchors(text);
              headingCache.set(relativePath, anchors);
              return anchors;
            }

            function collectHeadingAnchors(text) {
              const anchors = new Set();
              const slugCounts = new Map();
              const lines = text.split(/\r?\n/);

              for (const line of lines) {
                const match = line.match(/^\s{0,3}(#{1,6})\s+(.*)$/);
                if (!match) {
                  continue;
                }
                let headingText = match[2].trim();
                headingText = headingText.replace(/\s+#+\s*$/, '').trim();
                if (!headingText) {
                  continue;
                }
                let slug = slugify(headingText);
                if (!slug) {
                  continue;
                }
                const count = slugCounts.get(slug) || 0;
                if (count === 0) {
                  slugCounts.set(slug, 1);
                  anchors.add(slug);
                } else {
                  slugCounts.set(slug, count + 1);
                  anchors.add(`${slug}-${count}`);
                }
              }

              return anchors;
            }

            function slugify(value, options = {}) {
              const preserveCase = options.preserveCase === true;
              const normalized = value
                .normalize('NFKD')
                .replace(/[\u0300-\u036f]/g, '')
                .trim();
              const cased = preserveCase ? normalized : normalized.toLowerCase();
              const cleaned = cased
                .replace(preserveCase ? /[^A-Za-z0-9\s-]/g : /[^a-z0-9\s-]/g, '')
                .replace(/\s+/g, '-')
                .replace(/-+/g, '-');
              return cleaned;
            }

            function normalizeAnchor(raw) {
              if (!raw) {
                return '';
              }
              let decoded = raw.trim();
              try {
                decoded = decodeURIComponent(decoded);
              } catch (_) {
                // Ignore decode failure.
              }
              return slugify(decoded, { preserveCase: true });
            }

            function lineFromIndex(text, index) {
              let line = 1;
              for (let i = 0; i < index; i += 1) {
                if (text.charCodeAt(i) === 10) {
                  line += 1;
                }
              }
              return line;
            }

            function formatFailure(reference, reason, details) {
              switch (reason) {
                case 'sameDocEmptyAnchor':
                  return `${reference.filePath} line ${reference.line}: anchor reference "${details}" must include a heading name.`;
                case 'missingSameDocAnchor':
                  return `${reference.filePath} line ${reference.line}: heading ${details} was not found in the same document.`;
                case 'emptyDocPath':
                  return `${reference.filePath} line ${reference.line}: link target must include a document name.`;
                case 'queryNotAllowed':
                  return `${reference.filePath} line ${reference.line}: document links cannot include query parameters (${details}).`;
                case 'outsideWorkspace':
                  return `${reference.filePath} line ${reference.line}: document path "${details}" resolves outside the repository.`;
                case 'pathNotAllowed':
                  return `${reference.filePath} line ${reference.line}: document links must not include directories; use just the filename (got "${details}").`;
                case 'extensionNotAllowed':
                  return `${reference.filePath} line ${reference.line}: link target "${details}" must omit the .md suffix.`;
                case 'docNotSnakeCase':
                  return `${reference.filePath} line ${reference.line}: document name "${details}" must be snake_case.`;
                case 'missingDocName':
                  return `${reference.filePath} line ${reference.line}: document link "${details}" must include a file name (without .md).`;
                case 'missingDocument':
                  return `${reference.filePath} line ${reference.line}: linked document ${details} does not exist.`;
                case 'ambiguousDocument':
                  return `${reference.filePath} line ${reference.line}: document link matches multiple files (${details}).`;
                case 'crossDocEmptyAnchor':
                  return `${reference.filePath} line ${reference.line}: link to ${details} must include a heading name after '#'.`;
                case 'anchorNotKebabCase':
                  return `${reference.filePath} line ${reference.line}: heading reference ${details} must be kebab-case.`;
                case 'missingCrossDocAnchor':
                  return `${reference.filePath} line ${reference.line}: heading ${details} was not found.`;
                case 'invalidHashCount':
                  return `${reference.filePath} line ${reference.line}: link target "${details}" cannot contain more than one '#'.`;
                default:
                  return `${reference.filePath} line ${reference.line}: invalid link target ${details}.`;
              }
            }

            function collectMarkdownFiles(relativeDir) {
              const files = [];
              const absoluteDir = relativeDir ? path.join(workspaceRoot, relativeDir) : workspaceRoot;
              let entries;
              try {
                entries = fs.readdirSync(absoluteDir, { withFileTypes: true });
              } catch (_) {
                return files;
              }

              for (const entry of entries) {
                if (entry.name === '.git') {
                  continue;
                }
                const relPath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name;
                if (entry.isDirectory()) {
                  files.push(...collectMarkdownFiles(relPath));
                } else if (entry.isFile()) {
                  const ext = path.extname(entry.name).toLowerCase();
                  if (allowedExt.has(ext)) {
                    files.push(relPath.replace(/\\/g, '/'));
                  }
                }
              }

              return files;
            }

      - name: Show invalid internal links
        if: env.ERROR_BLOCK != ''
        run: |
          echo 'Invalid markdown links:'
          printf '```\n%s\n```\n' "${{ env.ERROR_BLOCK }}"
          exit 1