diff --git a/.cursor/rules/openapi-api-specification-implementation.mdc b/.cursor/rules/openapi-api-specification-implementation.mdc new file mode 100644 index 000000000..ee931eb6f --- /dev/null +++ b/.cursor/rules/openapi-api-specification-implementation.mdc @@ -0,0 +1,79 @@ +--- +title: OpenAPI Specification and Implementation +description: Guidelines for maintaining OpenAPI specifications in sync with API implementation +globs: + - "docs/openapi/**" + - "src/**" +alwaysApply: false +version: "1.0.0" +--- + +# OpenAPI Specification and Implementation + +## Context +When changes touch API specification files under `docs/openapi` **or** implementation files under `src`, this rule applies. + +## Core Principles + +### Specification-Implementation Sync +- Any new endpoint, modification of existing endpoint, or removal must include **both** the OpenAPI spec change **and** the matching implementation in `src` +- If the implementation is deferred, the spec's endpoint description **must start with** `Not implemented yet` +and the actual handler must return HTTP **501** with a clear JSON message: +```json +{ "error": "Not implemented yet: " } +``` +- Keep examples in sync with actual API behavior + +### Schema Management +- All request/response bodies and parameters **must reference** schemas in `schemas.yaml` +- Reuse existing schemas wherever possible to avoid duplication +- Avoid redefining data structures inline unless absolutely necessary +- Use **composition, inheritance, polymorphism** (as supported by OpenAPI 3.1.1) to reuse common parts +- When introducing new audit types, update: + - Audit type enum + - Audit result schema + - At least one complete example + +### Examples +- Add examples to `examples.yaml` and reference them (don't inline) +- Include at least one request and response example per endpoint +- Examples must validate against the schema and represent realistic, production-like data + +### Precision & Consistency +- Keep definitions as precise as possible (correct types, constraints, formats, etc.) +- Endpoints under the same tag must follow consistent naming, path structure, response format, etc. + +### Validation & Documentation Build +- After modifying OpenAPI specs, **always run** `npm run docs:lint` to validate the specification +- Before completing implementation, **must run** `npm run docs:build` to generate documentation +- Fix any linting errors or build failures before considering the task complete + +## API Design Patterns + +### Pagination +- Set strict limits on collection resource items returned +- Support optional `limit` query parameter with documented defaults, min/max values +- Use cursor-based pagination with `cursor` query parameter +- Response format: `{ "cursor": "next-token", "items": [...] }` +- Omit `cursor` property when no more items exist +- Use consistent naming: `limit` and `cursor` across all paginated endpoints + +### Bulk Operations +- **Prefer bulk endpoints over separate single/multi-item endpoints** +- Accept arrays containing one or more items +- Document explicitly in description that single items can be passed in arrays +- Set strict limits on bulk POST/PUT/PATCH/DELETE request sizes +- Response format: `{ "metadata": { "total": N, "success": N, "failure": N }, "failures": [...], "items": [...] }` +- Specify atomicity behavior and per-item error reporting + +### Parameters +- For URLs passed as path parameters, use {base64Url} and encode as URL-safe base64 without padding (RFC 4648 ยง5) to avoid +/= issues in paths +- Maintain consistent naming across all endpoints + +### POST Requests +- Document idempotency behavior explicitly +- Specify duplicate detection mechanism if applicable +- Indicate whether upserts are supported + +### PATCH Requests +- Implementation must update only fields provided in request. If other fields are modified or reset, document this behavior clearly in the endpoint description diff --git a/.gitignore b/.gitignore index 3bca3c0c8..735e2a646 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ admin-idp-p*.json *.code-workspace .prettierrc .vscode/settings.json -.cursor \ No newline at end of file +.cursor/* +!.cursor/rules/ diff --git a/docs/openapi/api.yaml b/docs/openapi/api.yaml index 39e8d7d96..0978e1763 100644 --- a/docs/openapi/api.yaml +++ b/docs/openapi/api.yaml @@ -67,6 +67,8 @@ tags: description: Report generation and management operations - name: project description: Project management operations + - name: url-store + description: URL Store operations for managing audit URLs paths: /audits/latest/{auditType}: @@ -200,6 +202,14 @@ paths: $ref: './site-enrollments-api.yaml#/site-enrollments-by-site' /sites/{siteId}/user-activities: $ref: './user-activities-api.yaml#/paths/~1sites~1{siteId}~1user-activities' + /sites/{siteId}/url-store: + $ref: './url-store-api.yaml#/list-all-urls' + /sites/{siteId}/url-store/by-source/{source}: + $ref: './url-store-api.yaml#/list-urls-by-source' + /sites/{siteId}/url-store/by-audit/{auditType}: + $ref: './url-store-api.yaml#/list-urls-by-audit' + /sites/{siteId}/url-store/{base64Url}: + $ref: './url-store-api.yaml#/get-url' /sites/{siteId}/key-events: $ref: './key-events-api.yaml#/key-events' /sites/{siteId}/key-events/{keyEventId}: diff --git a/docs/openapi/parameters.yaml b/docs/openapi/parameters.yaml index 7bfb94b30..3c97bea33 100644 --- a/docs/openapi/parameters.yaml +++ b/docs/openapi/parameters.yaml @@ -33,6 +33,15 @@ deliveryType: required: true schema: $ref: './schemas.yaml#/DeliveryType' +base64Url: + name: base64Url + description: Base64-encoded URL + in: path + required: true + schema: + type: string + format: base64url + example: "aHR0cHM6Ly9leGFtcGxlLmNvbS9wYWdl" base64BaseUrl: name: base64BaseUrl description: Base64-encoded base URL @@ -49,6 +58,14 @@ base64PageUrl: schema: type: string format: base64url +urlSource: + name: source + description: The source of the URLs (e.g., "manual") + in: path + required: true + schema: + type: string + example: "manual" ascending: name: ascending description: Whether to sort ascending or descending @@ -342,4 +359,4 @@ llmoConfigVersion: schema: type: string description: S3 object version ID - example: "abc123def456" + example: "abc123def456" \ No newline at end of file diff --git a/docs/openapi/schemas.yaml b/docs/openapi/schemas.yaml index 37199dc02..2a29b1355 100644 --- a/docs/openapi/schemas.yaml +++ b/docs/openapi/schemas.yaml @@ -4630,3 +4630,239 @@ PreflightResultItem: type: string enum: [identify, suggest] description: The step of the preflight job when this result was generated + +AuditUrl: + type: object + required: [siteId, url, source, audits, createdAt, updatedAt, createdBy, updatedBy] + properties: + siteId: + $ref: '#/Id' + url: + $ref: '#/URL' + source: + type: string + description: Origin of the URL (e.g., "manual", "sitemap") + default: "manual" + example: "manual" + audits: + type: array + items: + type: string + description: Enabled audit types for this URL (can be empty) + example: ["accessibility", "broken-backlinks"] + createdAt: + $ref: '#/DateTime' + updatedAt: + $ref: '#/DateTime' + createdBy: + type: string + description: User or service who created the URL entry + example: "user-alice" + updatedBy: + type: string + description: Last user or service to modify the URL entry + example: "user-bob" + example: + siteId: "550e8400-e29b-41d4-a716-446655440000" + url: "https://example.com/foo/bar" + source: "manual" + audits: ["accessibility", "broken-backlinks"] + createdAt: "2025-10-10T12:00:00Z" + updatedAt: "2025-10-10T15:30:00Z" + createdBy: "user-alice" + updatedBy: "user-bob" + +AuditUrlList: + type: object + properties: + items: + type: array + items: + $ref: '#/AuditUrl' + description: Array of audit URLs + cursor: + type: string + description: Pagination cursor for retrieving the next page of results + example: "eyJzaXRlSWQiOiIxMjMiLCJ1cmwiOiJodHRwczovL2V4YW1wbGUuY29tIn0" + example: + items: + - siteId: "550e8400-e29b-41d4-a716-446655440000" + url: "https://example.com/documents/report.pdf" + source: "manual" + audits: ["accessibility"] + createdAt: "2025-10-10T12:00:00Z" + updatedAt: "2025-10-10T12:00:00Z" + createdBy: "user-alice" + updatedBy: "user-alice" + - siteId: "550e8400-e29b-41d4-a716-446655440000" + url: "https://example.com/page2" + source: "sitemap" + audits: ["broken-backlinks"] + createdAt: "2025-10-10T12:00:00Z" + updatedAt: "2025-10-10T12:00:00Z" + createdBy: "system" + updatedBy: "system" + cursor: "eyJzaXRlSWQiOiIxMjMiLCJ1cmwiOiJodHRwczovL2V4YW1wbGUuY29tIn0" + +AuditUrlBulkResponse: + type: object + properties: + metadata: + type: object + properties: + total: + type: integer + description: Total number of URLs in the request + example: 2 + success: + type: integer + description: Number of URLs processed successfully + example: 1 + failure: + type: integer + description: Number of URLs that failed to process + example: 1 + failures: + type: array + items: + type: object + properties: + url: + type: string + description: The URL that failed to process + example: "https://example.com/invalid" + reason: + type: string + description: Reason for the failure + example: "URL does not belong to site domain" + description: Array of failed operations with reasons + items: + type: array + items: + $ref: '#/AuditUrl' + description: Array of successfully processed audit URLs + example: + metadata: + total: 2 + success: 1 + failure: 1 + failures: + - url: "https://example.com/invalid" + reason: "URL does not belong to site domain" + items: + - siteId: "550e8400-e29b-41d4-a716-446655440000" + url: "https://example.com/page1" + source: "manual" + audits: ["accessibility", "broken-backlinks"] + createdAt: "2025-10-10T12:00:00Z" + updatedAt: "2025-10-10T15:30:00Z" + createdBy: "user-alice" + updatedBy: "user-bob" + +AuditUrlDeleteResponse: + type: object + properties: + metadata: + type: object + properties: + total: + type: integer + description: Total number of URLs in the delete request + example: 2 + success: + type: integer + description: Number of URLs deleted successfully + example: 1 + failure: + type: integer + description: Number of URLs that failed to delete + example: 1 + failures: + type: array + items: + type: object + properties: + url: + type: string + description: The URL that failed to delete + example: "https://example.com/from-sitemap" + reason: + type: string + description: Reason for the deletion failure + example: "Can only delete URLs with source='manual'" + description: Array of failed delete operations with reasons + example: + metadata: + total: 2 + success: 1 + failure: 1 + failures: + - url: "https://example.com/from-sitemap" + reason: "Can only delete URLs with source='manual'" + +AuditUrlAddRequest: + type: array + minItems: 1 + maxItems: 100 + items: + type: object + required: [url, audits] + properties: + url: + type: string + format: uri + description: The URL to add (will be canonicalized) + source: + type: string + description: Source of the URL (defaults to "manual") + default: manual + audits: + type: array + items: + type: string + description: Array of audit types to enable for this URL + example: + - url: "https://example.com/page1.html" + source: "manual" + audits: ["accessibility", "broken-backlinks"] + - url: "https://example.com/page2.html" + audits: ["broken-backlinks"] + +AuditUrlUpdateRequest: + type: array + minItems: 1 + maxItems: 100 + items: + type: object + required: [url, audits] + properties: + url: + type: string + format: uri + description: The URL to update + audits: + type: array + items: + type: string + description: New audits array (replaces existing) + example: + - url: "https://example.com/page1.html" + audits: ["accessibility"] + - url: "https://example.com/page2.html" + audits: [] + +AuditUrlDeleteRequest: + type: object + required: [urls] + properties: + urls: + type: array + minItems: 1 + maxItems: 100 + items: + type: string + format: uri + example: + urls: + - "https://example.com/page1.html" + - "https://example.com/page2.html" \ No newline at end of file diff --git a/docs/openapi/url-store-api.yaml b/docs/openapi/url-store-api.yaml new file mode 100644 index 000000000..14fe03e59 --- /dev/null +++ b/docs/openapi/url-store-api.yaml @@ -0,0 +1,349 @@ +list-all-urls: + parameters: + - $ref: './parameters.yaml#/siteId' + - name: limit + in: query + required: false + description: Maximum number of items to return (default 100, max 500) + schema: + type: integer + default: 100 + minimum: 0 + maximum: 500 + - name: cursor + in: query + required: false + description: Pagination cursor from previous response + schema: + type: string + get: + tags: + - url-store + summary: List all URLs for a site + description: | + Retrieves a paginated list of all audit URLs configured for the specified site. + Supports cursor-based pagination for efficient traversal of large datasets. + operationId: listAllUrls + responses: + '200': + description: Successful operation with list of audit URLs + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlList' + '400': + $ref: './responses.yaml#/400-no-site-id' + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + post: + tags: + - url-store + summary: Add URLs to the store (bulk operation) + description: | + Adds one or more URLs to the site's audit URL store. This operation is idempotent - + if a URL already exists (after canonicalization), the existing record is returned. + + Maximum 100 URLs per request. + operationId: addUrls + requestBody: + required: true + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlAddRequest' + responses: + '200': + description: URLs processed successfully + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlBulkResponse' + '400': + description: | + Bad request. Possible reasons: + - Request body exceeds 100 items + - Invalid URL format + - Invalid request body structure + content: + application/json: + schema: + type: object + properties: + message: + type: string + example: "Request body must contain between 1 and 100 URLs" + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + patch: + tags: + - url-store + summary: Update audit configurations for URLs (bulk operation) + description: | + Updates the audits array for one or more URLs. This operation overrides the existing + audits configuration and works for URLs from any source. + + Maximum 100 URLs per request. + operationId: updateAuditConfigs + requestBody: + required: true + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlUpdateRequest' + responses: + '200': + description: Audit configurations updated successfully + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlBulkResponse' + '400': + description: | + Bad request. Possible reasons: + - Request body exceeds 100 items + - Invalid URL format + - Invalid request body structure + content: + application/json: + schema: + type: object + properties: + message: + type: string + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + delete: + tags: + - url-store + summary: Remove URLs from the store (bulk operation) + description: | + Removes one or more URLs from the site's audit URL store. + + Constraint: Only URLs with source="manual" can be deleted. Attempts to delete + URLs from other sources will fail and be reported in the failures array. + + Maximum 100 URLs per request. + operationId: deleteUrls + requestBody: + required: true + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlDeleteRequest' + responses: + '200': + description: Delete operation completed + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlDeleteResponse' + '400': + description: | + Bad request. Possible reasons: + - Request body exceeds 100 items + - Invalid URL format + - Invalid request body structure + content: + application/json: + schema: + type: object + properties: + message: + type: string + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + +list-urls-by-source: + parameters: + - $ref: './parameters.yaml#/siteId' + - $ref: './parameters.yaml#/urlSource' + - name: limit + in: query + required: false + description: Maximum number of items to return (default 100, max 500) + schema: + type: integer + default: 100 + minimum: 0 + maximum: 500 + - name: cursor + in: query + required: false + description: Pagination cursor from previous response + schema: + type: string + get: + tags: + - url-store + summary: List URLs by source + description: | + Retrieves a paginated list of audit URLs filtered by their source + (e.g., "manual", "sitemap"). + operationId: listUrlsBySource + responses: + '200': + description: Successful operation with filtered list of audit URLs + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlList' + '400': + $ref: './responses.yaml#/400-no-site-id' + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + +list-urls-by-audit: + parameters: + - $ref: './parameters.yaml#/siteId' + - $ref: './parameters.yaml#/auditType' + - name: limit + in: query + required: false + description: Maximum number of items to return (default 100, max 500) + schema: + type: integer + default: 100 + minimum: 0 + maximum: 500 + - name: cursor + in: query + required: false + description: Pagination cursor from previous response + schema: + type: string + get: + tags: + - url-store + summary: List URLs by enabled audit type + description: | + Retrieves a paginated list of audit URLs that have a specific audit type enabled + (e.g., "accessibility", "broken-backlinks"). + operationId: listUrlsByAuditType + responses: + '200': + description: Successful operation with filtered list of audit URLs + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrlList' + '400': + $ref: './responses.yaml#/400-no-site-id' + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + $ref: './responses.yaml#/404-site-not-found-with-id' + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + +get-url: + parameters: + - $ref: './parameters.yaml#/siteId' + - $ref: './parameters.yaml#/base64Url' + get: + tags: + - url-store + summary: Get a specific URL from the store + description: | + Retrieves a specific audit URL by its base64-encoded URL. + + Example: "https://example.com/page" becomes "aHR0cHM6Ly9leGFtcGxlLmNvbS9wYWdl" + operationId: getUrl + responses: + '200': + description: Successful operation with audit URL details + content: + application/json: + schema: + $ref: './schemas.yaml#/AuditUrl' + example: + siteId: "550e8400-e29b-41d4-a716-446655440000" + url: "https://example.com/documents/report.pdf" + source: "manual" + audits: ["accessibility", "broken-backlinks"] + createdAt: "2025-10-10T12:00:00Z" + updatedAt: "2025-10-10T15:30:00Z" + createdBy: "user-alice" + updatedBy: "user-bob" + '400': + description: | + Bad request. Possible reasons: + - Site ID is required or invalid + - Invalid base64 URL encoding + content: + application/json: + schema: + type: object + properties: + message: + type: string + '401': + $ref: './responses.yaml#/401' + '403': + $ref: './responses.yaml#/403-site-access-forbidden' + '404': + description: Site or URL not found + content: + application/json: + schema: + type: object + properties: + message: + type: string + example: "URL not found in store" + '500': + $ref: './responses.yaml#/500' + security: + - ims_key: [] + - scoped_api_key: [] + +