From 3884d669321a4af0e150bddbf37d19a9a2517009 Mon Sep 17 00:00:00 2001 From: Nicolas Humbert Date: Mon, 19 Jan 2026 16:16:01 +0100 Subject: [PATCH 1/2] S3UTILS-216 Tool to detects missing permissions on CRR roles --- replicationAudit/README.md | 467 +++++++++++++++++ .../check-replication-permissions.js | 480 ++++++++++++++++++ .../list-buckets-with-replication.sh | 325 ++++++++++++ tests/unit/replicationAudit/policyChecker.js | 302 +++++++++++ 4 files changed, 1574 insertions(+) create mode 100644 replicationAudit/README.md create mode 100644 replicationAudit/check-replication-permissions.js create mode 100755 replicationAudit/list-buckets-with-replication.sh create mode 100644 tests/unit/replicationAudit/policyChecker.js diff --git a/replicationAudit/README.md b/replicationAudit/README.md new file mode 100644 index 00000000..f1ad52fa --- /dev/null +++ b/replicationAudit/README.md @@ -0,0 +1,467 @@ +# Scripts Documentation + +## list-buckets-with-replication.sh + +Lists all buckets with replication enabled across all accounts using the metadata API (bucketd). + +### Prerequisites + +- Access to an S3 connector node (`runners_s3`) with bucketd running +- `curl` and `jq` installed +- Bucketd accessible on port 9000 (default) + +### Usage + +1. Copy the script to the supervisor: + ```bash + scp replicationAudit/list-buckets-with-replication.sh root@:/root/ + ``` + +2. Connect to the supervisor as root and go to the federation directory + (by default `/srv/scality/s3/s3-offline/federation`): + ```bash + ssh root@ + cd /srv/scality/s3/s3-offline/federation + ENV_DIR=s3config + ``` + +3. Copy the script to an S3 connector node: + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m copy \ + -a 'src=/root/list-buckets-with-replication.sh dest=/root/' + ``` + +4. Run the script: + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'bash /root/list-buckets-with-replication.sh' + ``` + +5. Retrieve the output file: + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'cat /root/buckets-with-replication.json' + ``` + +### Configuration + +Environment variables can be used to customize behavior: + +| Variable | Default | Description | +|----------|---------|-------------| +| `BUCKETD_HOST` | localhost | Bucketd hostname | +| `BUCKETD_PORT` | 9000 | Bucketd port | +| `BATCH_SIZE` | 10 | Number of parallel curl requests (increase for faster execution) | +| `OUTPUT_FILE` | buckets-with-replication.json | Output file path | + +### Output Format + +The script produces a JSON file with metadata and results: + +```json +{ + "metadata": { + "timestamp": "2026-01-19T10:07:25Z", + "durationSeconds": 2, + "bucketdUrl": "http://localhost:9000", + "counts": { + "totalBucketsScanned": 150, + "bucketsWithReplication": 3, + "bucketsWithoutReplication": 147, + "fetchErrors": 0 + } + }, + "results": [ + { + "bucket": "source-bucket", + "owner": "267390090509", + "ownerDisplayName": "testaccount", + "sourceRole": "arn:aws:iam::267390090509:role/crr-source-role" + } + ] +} +``` + +### How It Works + +1. **Discovers buckets**: Queries `/default/bucket/users..bucket` which contains all bucket names across all accounts +2. **Fetches attributes**: For each bucket, fetches attributes via `/default/attributes/{bucket}` in batches of 10 (with connection reuse) +3. **Filters replication**: Identifies buckets with `replicationConfiguration` containing at least one enabled rule +4. **Extracts metadata**: Outputs bucket name, owner info, and the source role (part before the comma in the role ARN) + +### Example Run + +``` +=== List Buckets with Replication Enabled === +Bucketd: http://localhost:9000 +Batch size: 10 +Output file: buckets-with-replication.json + +Checking bucketd connectivity... +Connected to bucketd + +Step 1: Fetching bucket list... +Found 150 buckets (excluding internal buckets) + +Step 2: Fetching bucket attributes in batches of 10... + Processed 10/150 buckets (batch 1) + Processed 20/150 buckets (batch 2) + ... + +=== Summary === +Total buckets scanned: 150 + With replication: 3 + Without replication: 147 + Fetch errors: 0 +Duration: 2s +Output saved to: buckets-with-replication.json + +Done. +``` + +### Troubleshooting + +**"Cannot connect to bucketd"** +- Ensure you're running on an S3 connector node (`runners_s3`) where bucketd is running +- Check if bucketd is listening: `ss -tlnp | grep 9000` + +**"Found 0 buckets"** +- Verify users..bucket is accessible: `curl -s http://localhost:9000/default/bucket/users..bucket | jq '.Contents[].key'` + +**Script timeout** +- For large deployments, consider running directly on the S3 connector node via interactive SSH + +--- + +## check-replication-permissions.js + +Checks if replication roles have `s3:ReplicateObject` permission by directly +querying Vault metadata via the repd protocol (no vaultclient or credentials +needed). + +### Prerequisites + +- Output from `list-buckets-with-replication.sh` (buckets-with-replication.json) +- Access to an S3 connector node (`runners_s3`) with a container that has Node.js (e.g., vault container) + +### How It Works + +The script is self-contained with inlined functions from MetaData and Vault: + +- **Protocol constants** - from `MetaData/lib/protocol.json` +- **Protocol encoding** - from `MetaData/lib/server/ProtoBuilder.js` +- **Key generation** - from `vault/lib/Indexer.js` + +This ensures portability while using the exact same protocol and key formats as Vault. + +### Usage + +1. First, run `list-buckets-with-replication.sh` to generate the bucket list + (see above) + +2. Copy the script to the supervisor: + + ```bash + scp replicationAudit/check-replication-permissions.js root@:/root/ + ``` + +3. Connect to the supervisor as root (if not already connected) and go to the + federation directory (by default `/srv/scality/s3/s3-offline/federation`): + + ```bash + ssh root@ + cd /srv/scality/s3/s3-offline/federation + ENV_DIR=s3config + ``` + +4. Copy the script to an S3 connector node: + + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m copy \ + -a 'src=/root/check-replication-permissions.js dest=/root/' + ``` + +5. Find the vault-metadata repd leader IP: + + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'curl -s http://localhost:5300/_/raft/leader' + ``` + + This returns JSON like `{"ip":"10.160.116.162","port":4300}` - use the `ip` value. + + **Note:** Vault metadata uses port 5300 for admin. + +6. Find the vault container ID: + + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'crictl ps | awk "/scality-vault/ {print \$1}"' + ``` + +7. Copy files to `/var/tmp` (mounted in vault container) and run the script: + + ```bash + VAULT_CONTAINER= + LEADER_IP= + + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a "cp /root/check-replication-permissions.js /var/tmp/ && \ + cp /root/buckets-with-replication.json /var/tmp/ && \ + crictl exec $VAULT_CONTAINER node /var/tmp/check-replication-permissions.js \ + /var/tmp/buckets-with-replication.json $LEADER_IP /var/tmp/missing.json" + ``` + +8. Retrieve the output: + + ```bash + ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'cat /var/tmp/missing.json' + ``` + +### Command Line Arguments + +``` +node check-replication-permissions.js [input-file] [leader-ip] [output-file] [--include-policies] +``` + +| Argument | Default | Description | +|----------|---------|-------------| +| `input-file` | /root/buckets-with-replication.json | Input JSON from list script | +| `leader-ip` | 127.0.0.1 | Vault-metadata repd leader IP | +| `output-file` | /root/missing-replication-permissions.json | Output file path | +| `--include-policies` | (not set) | Include full policy documents in output | + +### Output Format + +The script produces a JSON file with metadata and results. The `results` array +contains **only buckets missing the `s3:ReplicateObject` permission**. + +**Default output (compact):** + +```json +{ + "metadata": { + "timestamp": "2026-01-19T15:27:11.557Z", + "durationMs": 24, + "durationHuman": "0.0s", + "repdLeader": "10.160.112.179:4300", + "inputFile": "/tmp/buckets.json", + "counts": { + "totalBuckets": 6, + "bucketsOk": 3, + "bucketsMissingPermission": 3, + "bucketsSkipped": 0, + "bucketsWithErrors": 0, + "uniqueRolesChecked": 2 + } + }, + "results": [ + { + "bucket": "bucket-old-1", + "sourceRole": "arn:aws:iam::267390090509:role/crr-role-outdated", + "policies": [ + { + "name": "crr-policy-outdated", + "path": "/", + "allowsReplicateObject": false + } + ] + } + ] +} +``` + +**With `--include-policies` (full policy documents):** + +```json +{ + "metadata": { + "timestamp": "2026-01-19T15:27:11.557Z", + "durationMs": 24, + "durationHuman": "0.0s", + "repdLeader": "10.160.112.179:4300", + "inputFile": "/tmp/buckets.json", + "counts": { + "totalBuckets": 6, + "bucketsOk": 3, + "bucketsMissingPermission": 3, + "bucketsSkipped": 0, + "bucketsWithErrors": 0, + "uniqueRolesChecked": 2 + } + }, + "results": [ + { + "bucket": "bucket-old-1", + "sourceRole": "arn:aws:iam::267390090509:role/crr-role-outdated", + "policies": [ + { + "name": "crr-policy-outdated", + "path": "/", + "allowsReplicateObject": false, + "document": { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": ["s3:GetObjectVersion", "s3:GetObjectVersionAcl"], + "Resource": ["arn:aws:s3:::bucket-old-1/*"] + }, + { + "Effect": "Allow", + "Action": ["s3:ListBucket", "s3:GetReplicationConfiguration"], + "Resource": ["arn:aws:s3:::bucket-old-1"] + } + ] + } + } + ] + } + ] +} +``` + +In this example, the policy is missing `s3:ReplicateObject` - it only has +`s3:GetObjectVersion` and `s3:GetObjectVersionAcl`. + +**Fields:** + +- `bucket`: Source bucket name with replication enabled +- `sourceRole`: The IAM role ARN configured for replication +- `error`: (optional) Error message if role lookup failed +- `policies`: Array of all policies attached to the role, each containing: + - `name`: Policy name + - `path`: Policy path (e.g., `/` or `/custom/path/`) + - `allowsReplicateObject`: Whether this policy grants `s3:ReplicateObject` + (always false in output since we only output missing permissions) + - `document`: (only with `--include-policies`) Full IAM policy document for analysis + +### Script Logic + +1. **Connects to repd**: TCP connection to vault-metadata repd on port 4300 +2. **For each bucket's replication role**: + - Get role ID: `linkRoleArn(arn)` → role ID + - List attached policies: `policyByRoleId(accountId, roleId, '', '')` + - Get each policy: `policyByName(accountId, policyName)` +3. **Check permissions**: Evaluates if any policy allows `s3:ReplicateObject` +4. **Output**: Only buckets missing the required permission + +### Vault Metadata Key Schema + +The script queries the `vaultdb` database using these key patterns: + +| Purpose | Key Pattern | Example | +|---------|-------------|---------| +| Role by ARN | `roleArn:{arn}` | `roleArn:arn:aws:iam::123:role/MyRole` | +| Role data | `linkAccount:{id}:roleId:{roleId}` | `linkAccount:123:roleId:AROAXXX` | +| Policies | `linkAccount:{id}:roleId:{roleId}:policy:{path}` | `...policy:/my-policy` | +| Policy content | `accountId:{id}:policyName:{name}` | `accountId:123:policyName:my-policy` | + +### Example Run + +``` +=== Check Replication Role Permissions === +Input: /tmp/buckets.json +Output: /tmp/missing.json +Repd: 10.160.112.172:4300 + +Processing 6 buckets... + +[1/6] bucket-new-1 -> OK +[2/6] bucket-new-2 -> OK +[3/6] bucket-new-3 -> OK +[4/6] bucket-old-1 -> MISSING: s3:ReplicateObject +[5/6] bucket-old-2 -> MISSING: s3:ReplicateObject +[6/6] bucket-old-3 -> MISSING: s3:ReplicateObject + +=== Summary === +Total checked: 6 +Missing permission: 3 +Output saved to: /tmp/missing.json +``` + +### Complete Workflow Example + +Here's a complete example running both scripts end-to-end: + +```bash +# From your local machine: copy scripts to the supervisor +scp replicationAudit/list-buckets-with-replication.sh root@:/root/ +scp replicationAudit/check-replication-permissions.js root@:/root/ + +# Connect to the supervisor +ssh root@ +``` + +Then, from the supervisor, go to the federation directory +(by default `/srv/scality/s3/s3-offline/federation`): + +```bash +cd /srv/scality/s3/s3-offline/federation +ENV_DIR=s3config + +# Step 1: Copy scripts to S3 connector node +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m copy \ + -a 'src=/root/list-buckets-with-replication.sh dest=/root/' +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m copy \ + -a 'src=/root/check-replication-permissions.js dest=/root/' + +# Step 2: Run list-buckets-with-replication.sh +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'bash /root/list-buckets-with-replication.sh' + +# Step 3: Find the vault-metadata repd leader IP (port 5300) +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'curl -s http://localhost:5300/_/raft/leader' +# Note the "ip" value from the output, e.g., {"ip":"10.160.116.162","port":4300} + +# Step 4: Find the vault container ID +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'crictl ps | awk "/scality-vault/ {print \$1}"' +# Note the container ID from the output + +# Step 5: Set variables and run the permission check script +VAULT_CONTAINER= +LEADER_IP= + +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a "cp /root/check-replication-permissions.js /var/tmp/ && \ + cp /root/buckets-with-replication.json /var/tmp/ && \ + crictl exec $VAULT_CONTAINER node /var/tmp/check-replication-permissions.js \ + /var/tmp/buckets-with-replication.json $LEADER_IP /var/tmp/missing.json" + +# Step 6: Retrieve results +ansible -i env/$ENV_DIR/inventory runners_s3[0] -m shell \ + -a 'cat /var/tmp/missing.json' +``` + +### Troubleshooting + +**"Role not found in vault"** + +- The role ARN in the bucket's replication configuration doesn't exist in vault +- The role may have been deleted after replication was configured + +**"No policies attached to role"** + +- The role exists but has no managed policies attached +- Attach a policy with `s3:ReplicateObject` permission to the role + +**"Missing s3:ReplicateObject"** + +- The role has policies but none grant `s3:ReplicateObject` on the source bucket +- Review the policy documents in the output to see what permissions are granted +- Ensure the policy has: + - Action: `s3:ReplicateObject` (or `s3:*` or `s3:Replicate*`) + - Resource: matching the source bucket ARN (e.g., `arn:aws:s3:::bucket-name/*`) + +**Connection timeout or refused** + +- Ensure you're connecting to the correct repd leader IP +- The script must run inside a container that can reach repd on port 4300 +- Find the leader: `curl -s http://localhost:5300/_/raft/leader` + +**Script timeout** + +- For many buckets, run directly on the S3 connector node via interactive SSH diff --git a/replicationAudit/check-replication-permissions.js b/replicationAudit/check-replication-permissions.js new file mode 100644 index 00000000..55db3a69 --- /dev/null +++ b/replicationAudit/check-replication-permissions.js @@ -0,0 +1,480 @@ +#!/usr/bin/env node +/* eslint-disable no-console */ +/** + * check-replication-permissions.js + * + * Checks if replication roles have s3:ReplicateObject permission + * by querying Vault metadata directly via repd protocol. + * + * Usage: node check-replication-permissions.js [input-file] [leader-ip] [output-file] + * + * How it connects to vault metadata: + * + * Vault metadata has no HTTP frontend (no bucketd). This script connects + * directly to repd (the raft-based metadata store) on TCP port 4300 + * using a simple protocol: 4-byte length prefix + JSON payload. + */ + +const net = require('net'); +const fs = require('fs'); + +// =========================================================================== +// Configuration +// =========================================================================== +const CONFIG = { + inputFile: process.argv[2] || '/root/buckets-with-replication.json', + leaderIp: process.argv[3] || '127.0.0.1', + outputFile: process.argv[4] || '/root/missing-replication-permissions.json', + repdPort: 4300, + dbName: 'vaultdb', + includePolicies: process.argv.includes('--include-policies'), + requestTimeoutMs: 10000, +}; + +// =========================================================================== +// Logging +// =========================================================================== +function log(message) { + console.error(message); +} + +function logProgress(current, total, bucket, status) { + process.stderr.write(`[${current}/${total}] ${bucket} -> ${status}\n`); +} + +// =========================================================================== +// Protocol constants and encoding +// [FROM MetaData/lib/protocol.json and MetaData/lib/server/ProtoBuilder.js] +// +// Messages are framed as: [4-byte length][JSON payload] +// =========================================================================== +const PROTOCOL = { + FROM_REPD: 1, + TYPE_REQUEST: 1, + METHOD_GET: 2, + METHOD_LIST: 4, + HEADER_SIZE: 4, // 32-bit big-endian integer for message length +}; + +function encodeMessage(payload) { + const json = JSON.stringify(payload); + const jsonLength = Buffer.byteLength(json); + const buffer = Buffer.allocUnsafe(PROTOCOL.HEADER_SIZE + jsonLength); + buffer.writeInt32BE(jsonLength); + buffer.write(json, PROTOCOL.HEADER_SIZE); + return buffer; +} + +function tryReadMessage(buffer) { + if (buffer.length < PROTOCOL.HEADER_SIZE) { + return null; + } + + const jsonLength = buffer.readUInt32BE(0); + const totalLength = PROTOCOL.HEADER_SIZE + jsonLength; + + if (buffer.length < totalLength) { + return null; + } + + return { + data: JSON.parse(buffer.slice(PROTOCOL.HEADER_SIZE, totalLength).toString()), + remaining: buffer.slice(totalLength), + }; +} + +// =========================================================================== +// Vault database key generation +// [FROM vault/lib/Indexer.js] +// =========================================================================== +const VaultKeys = { + /** Get key for looking up role by ARN */ + roleByArn(roleArn) { + return `roleArn:${roleArn}`; + }, + + /** Get key prefix for policies attached to a role */ + policiesByRole(accountId, roleId) { + return `linkAccount:${accountId}:roleId:${roleId}:policy:`; + }, + + /** Get key for looking up policy by name */ + policyByName(accountId, policyName) { + return `accountId:${accountId}:policyName:${policyName}`; + }, + + /** Get range limits for prefix-based listing */ + getRangeLimits(prefix) { + const end = prefix.slice(0, -1) + String.fromCharCode(prefix.charCodeAt(prefix.length - 1) + 1); + return [prefix, end]; + }, + + /** Extract policy path and name from a policiesByRole key */ + extractPolicyFromKey(key) { + const policyPart = key.split(':policy:')[1] || ''; + const lastSlash = policyPart.lastIndexOf('/'); + + if (lastSlash === -1) { + return { path: '/', name: policyPart }; + } + + return { + path: policyPart.substring(0, lastSlash + 1), + name: policyPart.substring(lastSlash + 1), + }; + }, +}; + +// =========================================================================== +// Repd Client +// =========================================================================== +class RepdClient { + constructor(host, port, dbName) { + this.host = host; + this.port = port; + this.dbName = dbName; + this.client = null; + this.requestId = 0; + this.receiveBuffer = Buffer.alloc(0); + this.pendingCallback = null; + } + + connect() { + return new Promise((resolve, reject) => { + this.client = net.connect(this.port, this.host); + this.client.on('connect', resolve); + this.client.on('error', reject); + this.client.on('data', chunk => this.handleData(chunk)); + }); + } + + handleData(chunk) { + this.receiveBuffer = Buffer.concat([this.receiveBuffer, chunk]); + + const message = tryReadMessage(this.receiveBuffer); + if (message && this.pendingCallback) { + this.receiveBuffer = message.remaining; + const callback = this.pendingCallback; + this.pendingCallback = null; + callback(null, message.data); + } + } + + sendRequest(method, params) { + return new Promise((resolve, reject) => { + this.pendingCallback = (err, response) => { + if (err) { + reject(err); + } else { + resolve(response); + } + }; + + const request = { + from: PROTOCOL.FROM_REPD, + type: PROTOCOL.TYPE_REQUEST, + logUids: `check-${Date.now()}`, + repd: { + method, + id: ++this.requestId, + db: this.dbName, + ...params, + }, + }; + + this.client.write(encodeMessage(request)); + + setTimeout(() => { + if (this.pendingCallback) { + this.pendingCallback = null; + reject(new Error('Request timeout')); + } + }, CONFIG.requestTimeoutMs); + }); + } + + async get(key) { + const response = await this.sendRequest(PROTOCOL.METHOD_GET, { key }); + return response.repd?.data; + } + + async list(keyStart, keyEnd) { + const response = await this.sendRequest(PROTOCOL.METHOD_LIST, { + parameters: { + gte: keyStart, + lte: keyEnd, + keys: true, + values: true, + maxKeys: 1000, + }, + }); + return response.repd?.data || []; + } + + close() { + if (this.client) { + this.client.end(); + } + } +} + +// =========================================================================== +// Permission checking helpers +// =========================================================================== + +/** Extract account ID from a role ARN */ +function getAccountIdFromArn(arn) { + return arn.split(':')[4]; +} + +/** Check if a policy action matches s3:ReplicateObject */ +function isReplicateAction(action) { + return action === '*' + || action === 's3:*' + || action === 's3:Replicate*' + || action === 's3:ReplicateObject'; +} + +/** Check if a policy resource matches the bucket */ +function isMatchingResource(resource, bucketArn) { + return resource === '*' + || resource === 'arn:aws:s3:::*' + || resource === 'arn:aws:s3:::*/*' + || resource === bucketArn + || resource === `${bucketArn}/*`; +} + +/** Check if a policy document grants s3:ReplicateObject for a bucket */ +function policyAllowsReplication(policyDoc, bucketName) { + const bucketArn = `arn:aws:s3:::${bucketName}`; + const statements = [].concat(policyDoc.Statement || []); + + for (const statement of statements) { + if (statement.Effect !== 'Allow') { + continue; + } + + const actions = [].concat(statement.Action || []); + const hasReplicateAction = actions.some(isReplicateAction); + if (!hasReplicateAction) { + continue; + } + + const resources = [].concat(statement.Resource || []); + const hasMatchingResource = resources.some(r => isMatchingResource(r, bucketArn)); + if (hasMatchingResource) { + return true; + } + } + + return false; +} + +/** Extract the current policy document from stored policy data */ +function extractPolicyDocument(policyData) { + const policy = JSON.parse(policyData); + const defaultVersion = policy.defaultVersion || 1; + const version = (policy.versions || []).find(v => v.id === defaultVersion); + return version?.doc || {}; +} + +// =========================================================================== +// Core logic +// =========================================================================== + +/** + * Check if a bucket's replication role has s3:ReplicateObject permission + */ +async function checkBucketPermissions(repdClient, bucket, sourceRole) { + const accountId = getAccountIdFromArn(sourceRole); + const result = { + bucket, + sourceRole, + policies: [], + }; + + // Get role ID from ARN + const roleKey = VaultKeys.roleByArn(sourceRole); + const roleId = await repdClient.get(roleKey); + + if (!roleId) { + return { + hasPermission: false, + result: { ...result, error: 'Role not found in vault' }, + }; + } + + // List policies attached to the role + const policyKeyPrefix = VaultKeys.policiesByRole(accountId, roleId); + const [keyStart, keyEnd] = VaultKeys.getRangeLimits(policyKeyPrefix); + const policyLinks = await repdClient.list(keyStart, keyEnd); + + if (policyLinks.length === 0) { + return { + hasPermission: false, + result: { ...result, error: 'No policies attached to role' }, + }; + } + + // Check each attached policy + let hasPermission = false; + + for (const link of policyLinks) { + const { name: policyName, path: policyPath } = VaultKeys.extractPolicyFromKey(link.key); + const policyKey = VaultKeys.policyByName(accountId, policyName); + const policyData = await repdClient.get(policyKey); + + if (!policyData) { + continue; + } + + try { + const policyDoc = extractPolicyDocument(policyData); + const allowsReplication = policyAllowsReplication(policyDoc, bucket); + + const policyInfo = { + name: policyName, + path: policyPath, + allowsReplicateObject: allowsReplication, + }; + if (CONFIG.includePolicies) { + policyInfo.document = policyDoc; + } + result.policies.push(policyInfo); + + if (allowsReplication) { + hasPermission = true; + } + } catch (e) { + result.policies.push({ name: policyName, error: 'Failed to parse policy' }); + } + } + + return { hasPermission, result }; +} + +/** Build the output object with metadata and results */ +function buildOutput(buckets, results, stats, rolesChecked, startTime) { + const durationMs = Date.now() - startTime; + + return { + metadata: { + timestamp: new Date().toISOString(), + durationMs, + durationHuman: `${(durationMs / 1000).toFixed(1)}s`, + repdLeader: `${CONFIG.leaderIp}:${CONFIG.repdPort}`, + inputFile: CONFIG.inputFile, + counts: { + totalBuckets: buckets.length, + bucketsOk: stats.ok, + bucketsMissingPermission: stats.missing, + bucketsSkipped: stats.skipped, + bucketsWithErrors: stats.errors, + uniqueRolesChecked: rolesChecked.size, + }, + }, + results, + }; +} + +/** Print summary to stderr */ +function printSummary(bucketCount, stats, rolesChecked, durationHuman) { + log('\n=== Summary ==='); + log(`Total checked: ${bucketCount}`); + log(` OK: ${stats.ok}`); + log(` Missing permission: ${stats.missing}`); + log(` Skipped (no role): ${stats.skipped}`); + log(` Errors: ${stats.errors}`); + log(`Unique roles checked: ${rolesChecked.size}`); + log(`Duration: ${durationHuman}`); +} + +// =========================================================================== +// Main +// =========================================================================== +async function main() { + const startTime = Date.now(); + + log('=== Check Replication Role Permissions ==='); + log(`Input: ${CONFIG.inputFile}`); + log(`Output: ${CONFIG.outputFile}`); + log(`Repd: ${CONFIG.leaderIp}:${CONFIG.repdPort}`); + log(''); + + // Read input file + const inputData = JSON.parse(fs.readFileSync(CONFIG.inputFile, 'utf8')); + const buckets = inputData.results || inputData; // Support both old and new format + log(`Processing ${buckets.length} buckets...\n`); + + // Connect to repd + const repdClient = new RepdClient(CONFIG.leaderIp, CONFIG.repdPort, CONFIG.dbName); + await repdClient.connect(); + + const stats = { + ok: 0, + missing: 0, + skipped: 0, + errors: 0, + }; + const rolesChecked = new Set(); + const results = []; + + // Process each bucket + for (let i = 0; i < buckets.length; i++) { + const { bucket, sourceRole } = buckets[i]; + + if (!sourceRole) { + logProgress(i + 1, buckets.length, bucket, 'SKIP (no role)'); + stats.skipped++; + continue; + } + + rolesChecked.add(sourceRole); + + try { + const { hasPermission, result } = await checkBucketPermissions(repdClient, bucket, sourceRole); + + if (hasPermission) { + logProgress(i + 1, buckets.length, bucket, 'OK'); + stats.ok++; + } else { + const reason = result.error || 's3:ReplicateObject'; + logProgress(i + 1, buckets.length, bucket, `MISSING: ${reason}`); + results.push(result); + stats.missing++; + } + } catch (e) { + logProgress(i + 1, buckets.length, bucket, `ERROR: ${e.message}`); + results.push({ bucket, sourceRole, error: e.message, policies: [] }); + stats.errors++; + } + } + + repdClient.close(); + + // Build and save output + const output = buildOutput(buckets, results, stats, rolesChecked, startTime); + fs.writeFileSync(CONFIG.outputFile, JSON.stringify(output, null, 2)); + + // Print summary + printSummary(buckets.length, stats, rolesChecked, output.metadata.durationHuman); + + if (results.length > 0) { + log(`Output saved to: ${CONFIG.outputFile}`); + } + + console.log(JSON.stringify(output, null, 2)); +} + +// Run main only when executed directly (not when required as a module) +if (require.main === module) { + main().catch(e => { + console.error('Fatal error:', e.message); + process.exit(1); + }); +} + +// Export for testing +module.exports = { + policyAllowsReplication, +}; diff --git a/replicationAudit/list-buckets-with-replication.sh b/replicationAudit/list-buckets-with-replication.sh new file mode 100755 index 00000000..f4fcd6be --- /dev/null +++ b/replicationAudit/list-buckets-with-replication.sh @@ -0,0 +1,325 @@ +#!/bin/bash +# +# list-buckets-with-replication.sh +# +# Lists all buckets with replication enabled by querying the S3 metadata API. +# Output is a JSON file with metadata and results. +# +# Usage: ./list-buckets-with-replication.sh +# +# Environment variables: +# BUCKETD_HOST - Bucketd hostname (default: localhost) +# BUCKETD_PORT - Bucketd port (default: 9000) +# BATCH_SIZE - Number of parallel requests (default: 10) +# OUTPUT_FILE - Output file path (default: buckets-with-replication.json) +# + +set -e + +# =========================================================================== +# Configuration +# =========================================================================== +BUCKETD_HOST="${BUCKETD_HOST:-localhost}" +BUCKETD_PORT="${BUCKETD_PORT:-9000}" +BUCKETD_URL="http://${BUCKETD_HOST}:${BUCKETD_PORT}" +BATCH_SIZE="${BATCH_SIZE:-10}" # low to not overload bucketd +OUTPUT_FILE="${OUTPUT_FILE:-buckets-with-replication.json}" + +# Runtime variables +TMP_DIR=$(mktemp -d) +START_TIME=$(date +%s) +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") +FETCH_ERRORS=0 + +# Cleanup temporary directory on exit +trap "rm -rf $TMP_DIR" EXIT + +# =========================================================================== +# Helper functions +# =========================================================================== + +log() { + echo "$@" +} + +log_error() { + echo "Error: $@" >&2 +} + +check_dependencies() { + local missing=0 + for cmd in curl jq; do + if ! command -v "$cmd" &> /dev/null; then + log_error "$cmd is required but not installed" + missing=1 + fi + done + return $missing +} + +check_bucketd_connection() { + log "Checking bucketd connectivity..." + if ! curl -s --connect-timeout 5 "$BUCKETD_URL/_/healthcheck" > /dev/null 2>&1; then + log_error "Cannot connect to bucketd at $BUCKETD_URL" + return 1 + fi + log "Connected to bucketd" +} + +# Fetch all bucket names from the users..bucket database +# Handles pagination for large deployments +# Exits script on failure +fetch_bucket_list() { + local marker="" + local is_truncated="true" + + while [ "$is_truncated" = "true" ]; do + local url="$BUCKETD_URL/default/bucket/users..bucket" + [ -n "$marker" ] && url="${url}?marker=${marker}" + + local response + response=$(curl -sf "$url") || { + log_error "Failed to fetch bucket list from users..bucket" + exit 1 + } + + # Extract bucket names from response + # Keys are formatted as "{canonicalId}..|..{bucketName}" + echo "$response" | jq -r '.Contents[].key // empty' | sed 's/.*\.\.|\.\.//' + + # Check if there are more pages + is_truncated=$(echo "$response" | jq -r '.IsTruncated') + if [ "$is_truncated" = "true" ]; then + marker=$(echo "$response" | jq -r '.Contents[-1].key | @uri') + fi + done +} + +# Fetch bucket attributes for a batch of buckets +fetch_bucket_attributes_batch() { + local buckets=("$@") + local curl_args=() + + for bucket in "${buckets[@]}"; do + curl_args+=("-o" "$TMP_DIR/attr_${bucket}.json" "$BUCKETD_URL/default/attributes/$bucket") + done + + # Try parallel fetch first, fall back to sequential if not supported + curl -s --parallel --parallel-max "$BATCH_SIZE" "${curl_args[@]}" 2>/dev/null \ + || curl -s "${curl_args[@]}" 2>/dev/null \ + || true +} + +# Check if a bucket has replication enabled +# Returns 0 if replication is enabled, 1 otherwise +has_replication_enabled() { + local attr_file="$1" + + # Check if file exists and is not empty + if [ ! -f "$attr_file" ] || [ ! -s "$attr_file" ]; then + return 1 + fi + + # Check if replicationConfiguration exists + local repl_config + repl_config=$(jq -r '.replicationConfiguration // empty' "$attr_file" 2>/dev/null) + if [ -z "$repl_config" ] || [ "$repl_config" = "null" ]; then + return 1 + fi + + # Check if any rule is enabled + # Handle different API formats: enabled:true, status:"Enabled", Status:"Enabled" + local enabled_count + enabled_count=$(jq -r ' + .replicationConfiguration.rules // .replicationConfiguration.Rules // [] + | [.[] | select(.enabled == true or .status == "Enabled" or .Status == "Enabled")] + | length + ' "$attr_file" 2>/dev/null) + + if [ "$enabled_count" -gt 0 ]; then + return 0 + fi + + return 1 +} + +# Extract replication info from bucket attributes +extract_replication_info() { + local attr_file="$1" + local bucket="$2" + + local owner + local owner_display_name + local full_role + local source_role + + owner=$(jq -r '.owner // "unknown"' "$attr_file") + owner_display_name=$(jq -r '.ownerDisplayName // "unknown"' "$attr_file") + + # Role format is "sourceRole,destRole" - extract source role (before comma) + full_role=$(jq -r '.replicationConfiguration.role // .replicationConfiguration.Role // ""' "$attr_file") + source_role=$(echo "$full_role" | cut -d',' -f1) + + # Build JSON result object + jq -n \ + --arg bucket "$bucket" \ + --arg owner "$owner" \ + --arg ownerDisplayName "$owner_display_name" \ + --arg sourceRole "$source_role" \ + '{ + bucket: $bucket, + owner: $owner, + ownerDisplayName: $ownerDisplayName, + sourceRole: $sourceRole + }' +} + + +# Build the final output JSON with metadata +build_output() { + local bucket_count="$1" + local repl_count="$2" + local duration="$3" + local fetch_errors="$4" + + jq -n \ + --arg timestamp "$TIMESTAMP" \ + --argjson durationSeconds "$duration" \ + --arg bucketdUrl "$BUCKETD_URL" \ + --argjson totalBucketsScanned "$bucket_count" \ + --argjson bucketsWithReplication "$repl_count" \ + --argjson bucketsWithoutReplication "$((bucket_count - repl_count - fetch_errors))" \ + --argjson fetchErrors "$fetch_errors" \ + --slurpfile results "$TMP_DIR/results.ndjson" \ + '{ + metadata: { + timestamp: $timestamp, + durationSeconds: $durationSeconds, + bucketdUrl: $bucketdUrl, + counts: { + totalBucketsScanned: $totalBucketsScanned, + bucketsWithReplication: $bucketsWithReplication, + bucketsWithoutReplication: $bucketsWithoutReplication, + fetchErrors: $fetchErrors + } + }, + results: $results + }' +} + +print_summary() { + local bucket_count="$1" + local repl_count="$2" + local duration="$3" + local fetch_errors="$4" + + log "" + log "=== Summary ===" + log "Total buckets scanned: $bucket_count" + log " With replication: $repl_count" + log " Without replication: $((bucket_count - repl_count - fetch_errors))" + log " Fetch errors: $fetch_errors" + log "Duration: ${duration}s" + log "Output saved to: $OUTPUT_FILE" + log "" + log "Done." +} + +# =========================================================================== +# Main +# =========================================================================== + +main() { + log "=== List Buckets with Replication Enabled ===" + log "Bucketd: $BUCKETD_URL" + log "Batch size: $BATCH_SIZE" + log "Output file: $OUTPUT_FILE" + log "" + + # Check prerequisites + check_dependencies || exit 1 + check_bucketd_connection || exit 1 + + # Fetch list of all buckets + log "" + log "Step 1: Fetching bucket list..." + + # Stream bucket list to file (memory-efficient for large bucket counts) + fetch_bucket_list > "$TMP_DIR/buckets.txt" + + local bucket_count + bucket_count=$(wc -l < "$TMP_DIR/buckets.txt" | tr -d ' ') + + log "Found $bucket_count buckets (excluding internal buckets)" + + if [ "$bucket_count" -eq 0 ]; then + log "No buckets found" + echo '{"metadata":{},"results":[]}' > "$OUTPUT_FILE" + exit 0 + fi + + # Process buckets in batches + log "" + log "Step 2: Fetching bucket attributes in batches of $BATCH_SIZE..." + + local processed=0 + local batch_num=0 + local batch_start=1 + + # Initialize empty results file (NDJSON format: one JSON object per line) + : > "$TMP_DIR/results.ndjson" + + while [ "$batch_start" -le "$bucket_count" ]; do + batch_num=$((batch_num + 1)) + + # Read current batch from file (memory-efficient) + local batch=() + while IFS= read -r bucket; do + batch+=("$bucket") + done < <(sed -n "${batch_start},$((batch_start + BATCH_SIZE - 1))p" "$TMP_DIR/buckets.txt") + + local batch_count=${#batch[@]} + + # Fetch attributes for this batch + fetch_bucket_attributes_batch "${batch[@]}" + + # Process each bucket in the batch + for bucket in "${batch[@]}"; do + local attr_file="$TMP_DIR/attr_${bucket}.json" + + # Check if fetch failed (file missing or empty) + if [ ! -s "$attr_file" ]; then + FETCH_ERRORS=$((FETCH_ERRORS + 1)) + continue + fi + + if has_replication_enabled "$attr_file"; then + extract_replication_info "$attr_file" "$bucket" >> "$TMP_DIR/results.ndjson" + fi + + # Cleanup attribute file + rm -f "$attr_file" + done + + processed=$((processed + batch_count)) + batch_start=$((batch_start + BATCH_SIZE)) + log " Processed $processed/$bucket_count buckets (batch $batch_num)" + done + + # Calculate final stats + local repl_count + repl_count=$(jq -s 'length' "$TMP_DIR/results.ndjson" 2>/dev/null || echo 0) + + local end_time + end_time=$(date +%s) + + local duration=$((end_time - START_TIME)) + + # Build and save final output + build_output "$bucket_count" "$repl_count" "$duration" "$FETCH_ERRORS" > "$OUTPUT_FILE" + + # Print summary + print_summary "$bucket_count" "$repl_count" "$duration" "$FETCH_ERRORS" +} + +main "$@" diff --git a/tests/unit/replicationAudit/policyChecker.js b/tests/unit/replicationAudit/policyChecker.js new file mode 100644 index 00000000..bb234868 --- /dev/null +++ b/tests/unit/replicationAudit/policyChecker.js @@ -0,0 +1,302 @@ +const { + policyAllowsReplication, +} = require('../../../replicationAudit/check-replication-permissions'); + +describe('policyAllowsReplication', () => { + const bucketName = 'source-bucket'; + + describe('valid policies (should allow)', () => { + test('policy with explicit s3:ReplicateObject and matching bucket', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with s3:* wildcard action', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:*', + Resource: '*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with full wildcard (*) action', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: '*', + Resource: '*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with s3:Replicate* wildcard', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:Replicate*', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with action array including s3:ReplicateObject', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: [ + 's3:GetObjectVersion', + 's3:GetObjectVersionAcl', + 's3:ReplicateObject', + 's3:ReplicateDelete', + ], + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with resource array including matching bucket', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: [ + 'arn:aws:s3:::other-bucket/*', + 'arn:aws:s3:::source-bucket/*', + ], + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with multiple statements, one matching', () => { + const policy = { + Version: '2012-10-17', + Statement: [ + { + Effect: 'Allow', + Action: 's3:ListBucket', + Resource: 'arn:aws:s3:::source-bucket', + }, + { + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket/*', + }, + ], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with arn:aws:s3:::* resource wildcard', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + + test('policy with exact bucket ARN (no /* suffix)', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(true); + }); + }); + + describe('invalid policies (should reject)', () => { + test('policy with Deny effect', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Deny', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy missing s3:ReplicateObject action', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: [ + 's3:GetObjectVersion', + 's3:GetObjectVersionAcl', + ], + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy with wrong bucket resource', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::other-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy with empty Statement array', () => { + const policy = { + Version: '2012-10-17', + Statement: [], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy with no Statement property', () => { + const policy = { + Version: '2012-10-17', + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('empty policy object', () => { + expect(policyAllowsReplication({}, bucketName)).toBe(false); + }); + + test('policy with only bucket-level actions (no object actions)', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: [ + 's3:ListBucket', + 's3:GetReplicationConfiguration', + ], + Resource: 'arn:aws:s3:::source-bucket', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy with s3:ReplicateDelete but not s3:ReplicateObject', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateDelete', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('policy with partial bucket name match', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket-backup/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + }); + + describe('edge cases', () => { + test('statement with missing Effect defaults to implicit deny', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Action: 's3:ReplicateObject', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('statement with missing Action', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('statement with missing Resource', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('statement with null Action', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: null, + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('statement with empty Action array', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: [], + Resource: 'arn:aws:s3:::source-bucket/*', + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + + test('statement with empty Resource array', () => { + const policy = { + Version: '2012-10-17', + Statement: [{ + Effect: 'Allow', + Action: 's3:ReplicateObject', + Resource: [], + }], + }; + expect(policyAllowsReplication(policy, bucketName)).toBe(false); + }); + }); +}); From d9387382765155c0b92e840ac73ecf18a5d590f6 Mon Sep 17 00:00:00 2001 From: Nicolas Humbert Date: Tue, 20 Jan 2026 11:26:23 +0100 Subject: [PATCH 2/2] bump version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index cbd48c36..7623f063 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "s3utils", - "version": "1.17.0", + "version": "1.17.1", "engines": { "node": ">= 22" },