Skip to content

Commit 94d04f0

Browse files
committed
feat: anonymize file paths
Signed-off-by: Fred Bricon <[email protected]>
1 parent d238b6b commit 94d04f0

File tree

4 files changed

+281
-17
lines changed

4 files changed

+281
-17
lines changed

src/common/utils/events.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { AnalyticsEvent } from '../api/analyticsEvent';
22
import { Environment } from '../api/environment';
33
import { TelemetryEvent } from '../api/telemetry';
4+
import { anonymizeFilePaths } from './telemetryUtils';
45

56
/**
67
* Enhances a `TelemetryEvent` by injecting environmental data to its properties and context
@@ -95,6 +96,7 @@ import { TelemetryEvent } from '../api/telemetry';
9596
*/
9697
export const IGNORED_USERS = ['user', 'gitpod', 'theia', 'vscode', 'redhat']
9798
export const IGNORED_PROPERTIES = ['extension_name', 'extension_version', 'app_name', 'app_version', 'app_kind', 'app_remote', 'app_host', 'browser_name', 'browser_version', '']
99+
export const REDACTED_PATH_PROPERTIES = [/error/, /message/, /stacktrace/, /exception/]
98100

99101
export function transform(event: TelemetryEvent, userId: string, environment: Environment): AnalyticsEvent {
100102
//Inject Client name and version, Extension id and version, and timezone to the event properties
@@ -177,9 +179,14 @@ function sanitize(properties: any, environment: Environment): any {
177179
continue;
178180
}
179181
const isObj = isObject(rawProperty);
182+
180183
let sanitizedProperty = isObj ? JSON.stringify(rawProperty) : rawProperty;
181184

182-
sanitizedProperty = (sanitizedProperty as string).replace(usernameRegexp, '_username_');
185+
if (REDACTED_PATH_PROPERTIES.some(rpp => rpp.test(p))) {
186+
sanitizedProperty = anonymizeFilePaths(sanitizedProperty as string);
187+
}
188+
189+
sanitizedProperty = sanitizedProperty.replace(usernameRegexp, '_username_');
183190
if (isObj) {
184191
//let's try to deserialize into a sanitized object
185192
try {

src/common/utils/telemetryUtils.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
// Copied/adapted from https://github.com/microsoft/vscode/blob/6115140fb0657d86350c2de8bcf53e61c731d8cd/src/vs/platform/telemetry/common/telemetryUtils.ts
6+
7+
// Regex patterns for path sanitization
8+
const NODE_MODULES_REGEX = /[\\\/]?(node_modules|node_modules\.asar)[\\\/]/;
9+
const FILE_REGEX_PATTERN = /(file:\/\/)?([a-zA-Z]:(\\\\|\\|\/)|(\\\\|\\|\/))?([\w-\._]+(\\\\|\\|\/))+[\w-\._]+/g;
10+
11+
/**
12+
* Cleans a given stack of possible paths
13+
* @param stack The stack to sanitize
14+
* @param cleanupPatterns Cleanup patterns to remove from the stack
15+
* @returns The cleaned stack
16+
*/
17+
export function anonymizeFilePaths(stack: string): string {
18+
19+
// Fast check to see if it is a file path to avoid doing unnecessary heavy regex work
20+
if (!stack || (!stack.includes('/') && !stack.includes('\\'))) {
21+
return stack;
22+
}
23+
24+
// Create a new regex instance for this function call to avoid lastIndex mutation issues
25+
const fileRegex = new RegExp(FILE_REGEX_PATTERN);
26+
let updatedStack = '';
27+
let lastIndex = 0;
28+
29+
while (true) {
30+
const result = fileRegex.exec(stack);
31+
if (!result) {
32+
break;
33+
}
34+
35+
// Check if this is a node_modules path
36+
const isNodeModules = NODE_MODULES_REGEX.test(result[0]);
37+
38+
// anoynimize user file paths that do not need to be retained or cleaned up.
39+
if (!isNodeModules) {
40+
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>';
41+
} else {
42+
// For node_modules paths, anonymize the user part but preserve the node_modules part
43+
const match = result[0];
44+
const nodeModulesMatch = match.match(NODE_MODULES_REGEX);
45+
if (nodeModulesMatch) {
46+
const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]);
47+
// If the path starts with node_modules (no user part), preserve the entire path
48+
if (nodeModulesIndex === 0) {
49+
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
50+
} else {
51+
// Otherwise, anonymize the user part and preserve the node_modules part
52+
const nodeModulesPart = match.substring(nodeModulesIndex);
53+
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>' + nodeModulesPart;
54+
}
55+
} else {
56+
// Fallback: preserve the original text
57+
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
58+
}
59+
}
60+
lastIndex = fileRegex.lastIndex;
61+
}
62+
if (lastIndex < stack.length) {
63+
updatedStack += stack.substring(lastIndex);
64+
}
65+
66+
return updatedStack;
67+
}

src/tests/utils/events.test.ts

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ import { TelemetryEvent } from '../../common/api/telemetry';
55

66
const env: Environment = {
77
application: {
8-
name:'SuperCode',
9-
version:'6.6.6'
8+
name: 'SuperCode',
9+
version: '6.6.6'
1010
},
1111
extension: {
1212
name: 'my-ext',
1313
version: '1.2.3'
1414
},
15-
username:'Fred',
15+
username: 'Fred',
1616
platform: {
1717
name: 'DeathStar II'
1818
},
@@ -23,9 +23,9 @@ const USER_ID = "1234";
2323
suite('Test events enhancements', () => {
2424
test('should inject environment data', async () => {
2525
const event: TelemetryEvent = {
26-
name:'Something',
26+
name: 'Something',
2727
properties: {
28-
foo: 'bar',
28+
foo: 'http://bar',
2929
}
3030
}
3131

@@ -34,19 +34,19 @@ suite('Test events enhancements', () => {
3434
assert.strictEqual(betterEvent.properties.app_version, '6.6.6');
3535
assert.strictEqual(betterEvent.properties.extension_name, 'my-ext');
3636
assert.strictEqual(betterEvent.properties.extension_version, '1.2.3');
37-
assert.strictEqual(betterEvent.properties.foo, 'bar');
37+
assert.strictEqual(betterEvent.properties.foo, 'http://bar');
3838
assert.strictEqual(betterEvent.context.ip, '0.0.0.0');
3939

4040
});
4141

4242
test('should anonymize data', async () => {
4343
const event: TelemetryEvent = {
44-
name:'Something',
44+
name: 'Something',
4545
properties: {
4646
foo: 'Fred is Fred',
4747
qty: 10,
4848
active: false,
49-
bar: 'That c:\\Fred\\bar looks like a path',
49+
bar: 'That c:\\Fred\\bar looks like a path, but is not fully anonymized',
5050
error: 'An error occurred in /Users/Fred/foo/bar.txt! But we\'re fine',
5151
multiline: 'That url file://Fred/bar.txt is gone!\nNot that c:\\user\\bar though',
5252
obj: {
@@ -61,19 +61,47 @@ suite('Test events enhancements', () => {
6161
assert.strictEqual(betterEvent.properties.qty, 10);
6262
assert.strictEqual(betterEvent.properties.active, false);
6363
assert.strictEqual(betterEvent.properties.foo, '_username_ is _username_');
64-
assert.strictEqual(betterEvent.properties.bar, 'That c:\\_username_\\bar looks like a path');
65-
assert.strictEqual(betterEvent.properties.error, 'An error occurred in /Users/_username_/foo/bar.txt! But we\'re fine');
64+
assert.strictEqual(betterEvent.properties.bar, 'That c:\\_username_\\bar looks like a path, but is not fully anonymized');
65+
assert.strictEqual(betterEvent.properties.error, 'An error occurred in <REDACTED: user-file-path>! But we\'re fine');
6666
assert.strictEqual(betterEvent.properties.multiline, 'That url file://_username_/bar.txt is gone!\nNot that c:\\user\\bar though');
6767
assert.strictEqual(betterEvent.properties.obj.q, 'Who is _username_?');
6868
assert.strictEqual(betterEvent.properties.obj.a, '_username_ who?');
6969
});
7070

71+
test('should anonymize stacktraces', async () => {
72+
const stacktrace = `
73+
An internal error occurred during: "Updating workspace".
74+
Tree element '/myprojectname/target/classes' not found.
75+
org.eclipse.core.internal.dtree.ObjectNotFoundException: Tree element '/myprojectname/target/classes' not found.
76+
at org.eclipse.core.internal.dtree.AbstractDataTree.handleNotFound(AbstractDataTree.java:183)
77+
at org.eclipse.core.internal.dtree.DeltaDataTree.getData(DeltaDataTree.java:572)
78+
at org.eclipse.core.internal.dtree.DeltaDataTree.naiveCopyCompleteSubtree(DeltaDataTree.java:757)`;
79+
80+
const expectedStacktrace = `
81+
An internal error occurred during: "Updating workspace".
82+
Tree element '<REDACTED: user-file-path>' not found.
83+
org.eclipse.core.internal.dtree.ObjectNotFoundException: Tree element '<REDACTED: user-file-path>' not found.
84+
at org.eclipse.core.internal.dtree.AbstractDataTree.handleNotFound(AbstractDataTree.java:183)
85+
at org.eclipse.core.internal.dtree.DeltaDataTree.getData(DeltaDataTree.java:572)
86+
at org.eclipse.core.internal.dtree.DeltaDataTree.naiveCopyCompleteSubtree(DeltaDataTree.java:757)`;
87+
88+
const event: TelemetryEvent = {
89+
name: 'Something',
90+
properties: {
91+
stacktrace: stacktrace
92+
}
93+
}
94+
95+
const betterEvent = utils.transform(event, USER_ID, env);
96+
assert.strictEqual(betterEvent.properties.stacktrace, expectedStacktrace);
97+
});
98+
7199
test('should not anonymize special usernames', async () => {
72100
utils.IGNORED_USERS.forEach((user) => {
73101
const cheEnv: Environment = {
74102
application: {
75-
name:'SuperCode',
76-
version:'6.6.6'
103+
name: 'SuperCode',
104+
version: '6.6.6'
77105
},
78106
extension: {
79107
name: 'my-ext',
@@ -86,7 +114,7 @@ suite('Test events enhancements', () => {
86114
}
87115

88116
const event: TelemetryEvent = {
89-
name:'Something',
117+
name: 'Something',
90118
properties: {
91119
foo: 'vscode likes theia',
92120
multiline: 'That gitpod \nusername is a redhat user',
@@ -102,8 +130,8 @@ suite('Test events enhancements', () => {
102130
test('should not anonymize technical properties', async () => {
103131
const someEnv: Environment = {
104132
application: {
105-
name:'codename',
106-
version:'codename'
133+
name: 'codename',
134+
version: 'codename'
107135
},
108136
extension: {
109137
name: 'codename',
@@ -116,7 +144,7 @@ suite('Test events enhancements', () => {
116144
}
117145

118146
const event: TelemetryEvent = {
119-
name:'Something',
147+
name: 'Something',
120148
properties: {
121149
foo: 'codename likes vscode',
122150
multiline: 'That gitpod \ncodename is a redhat user',
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import * as assert from 'assert';
2+
import { anonymizeFilePaths } from '../../common/utils/telemetryUtils';
3+
4+
suite('Test anonymizeFilePaths', () => {
5+
test('should return empty string when input is empty', () => {
6+
const result = anonymizeFilePaths('');
7+
assert.strictEqual(result, '');
8+
});
9+
10+
test('should return null when input is null', () => {
11+
const result = anonymizeFilePaths(null as any);
12+
assert.strictEqual(result, null);
13+
});
14+
15+
test('should return undefined when input is undefined', () => {
16+
const result = anonymizeFilePaths(undefined as any);
17+
assert.strictEqual(result, undefined);
18+
});
19+
20+
test('should return original string when no file paths are present', () => {
21+
const input = 'This is just a regular string without any paths';
22+
const result = anonymizeFilePaths(input);
23+
assert.strictEqual(result, input);
24+
});
25+
26+
test('should return original string when no slashes are present', () => {
27+
const input = 'This string has no forward or backslashes';
28+
const result = anonymizeFilePaths(input);
29+
assert.strictEqual(result, input);
30+
});
31+
32+
test('should anonymize Unix file paths', () => {
33+
const input = 'Error in /Users/john/workspace/project/src/file.ts';
34+
const result = anonymizeFilePaths(input);
35+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
36+
});
37+
38+
test('should anonymize Windows file paths', () => {
39+
const input = 'Error in C:\\Users\\john\\workspace\\project\\src\\file.ts';
40+
const result = anonymizeFilePaths(input);
41+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
42+
});
43+
44+
test('should anonymize Windows file paths with forward slashes', () => {
45+
const input = 'Error in C:/Users/john/workspace/project/src/file.ts';
46+
const result = anonymizeFilePaths(input);
47+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
48+
});
49+
50+
test('should anonymize file:// URLs', () => {
51+
const input = 'Error in file:///Users/john/workspace/project/src/file.ts';
52+
const result = anonymizeFilePaths(input);
53+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
54+
});
55+
56+
test('should anonymize multiple file paths in same string', () => {
57+
const input = 'Error in /Users/john/file1.ts and also in C:\\Users\\jane\\file2.ts';
58+
const result = anonymizeFilePaths(input);
59+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and also in <REDACTED: user-file-path>');
60+
});
61+
62+
test('should not anonymize node_modules paths', () => {
63+
const input = 'Error in /Users/john/node_modules/package/index.js';
64+
const result = anonymizeFilePaths(input);
65+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>/node_modules/package/index.js');
66+
});
67+
68+
test('should not anonymize node_modules.asar paths', () => {
69+
const input = 'Error in /Users/john/node_modules.asar/package/index.js';
70+
const result = anonymizeFilePaths(input);
71+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>/node_modules.asar/package/index.js');
72+
});
73+
74+
test('should not anonymize node_modules paths with leading slash', () => {
75+
const input = 'Error in /node_modules/package/index.js';
76+
const result = anonymizeFilePaths(input);
77+
assert.strictEqual(result, input);
78+
});
79+
80+
test('should not anonymize node_modules paths with backslash', () => {
81+
const input = 'Error in \\node_modules\\package\\index.js';
82+
const result = anonymizeFilePaths(input);
83+
assert.strictEqual(result, input);
84+
});
85+
86+
test('should anonymize user paths but preserve node_modules paths', () => {
87+
const input = 'Error in /Users/john/project/src/file.ts and /Users/john/project/node_modules/package/index.js';
88+
const result = anonymizeFilePaths(input);
89+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and <REDACTED: user-file-path>/node_modules/package/index.js');
90+
});
91+
92+
test('should handle complex stack traces', () => {
93+
const stackTrace = `Error: Something went wrong
94+
at Object.function (/Users/john/workspace/project/src/file.ts:10:5)
95+
at /Users/john/workspace/project/src/other.ts:15:20
96+
at /Users/john/node_modules/package/index.js:5:10`;
97+
98+
const result = anonymizeFilePaths(stackTrace);
99+
// Note: The current implementation may include line numbers in the redacted path
100+
// This is acceptable behavior as it still anonymizes the sensitive parts
101+
assert(result.includes('<REDACTED: user-file-path>'));
102+
assert(!result.includes('/Users/john/workspace/project/src/file.ts'));
103+
assert(!result.includes('/Users/john/workspace/project/src/other.ts'));
104+
assert(result.includes('<REDACTED: user-file-path>/node_modules/package/index.js:5:10'));
105+
});
106+
107+
test('should handle paths with special characters', () => {
108+
const input = 'Error in /Users/john/my-project (copy)/src/file.ts';
109+
const result = anonymizeFilePaths(input);
110+
// Note: The current implementation may split paths with spaces in parentheses
111+
// This is acceptable behavior as it still anonymizes the sensitive parts
112+
assert(result.includes('<REDACTED: user-file-path>'));
113+
assert(!result.includes('/Users/john/my-project'));
114+
});
115+
116+
test('should handle paths with dots and dashes', () => {
117+
const input = 'Error in /Users/john/my-project.v2/src/file-name.ts';
118+
const result = anonymizeFilePaths(input);
119+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
120+
});
121+
122+
test('should handle relative paths', () => {
123+
const input = 'Error in ./src/file.ts and ../other/file.ts';
124+
const result = anonymizeFilePaths(input);
125+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and <REDACTED: user-file-path>');
126+
});
127+
128+
test('should handle paths without file extensions', () => {
129+
const input = 'Error in /Users/john/workspace/project/src/file';
130+
const result = anonymizeFilePaths(input);
131+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
132+
});
133+
134+
test('should handle mixed content with and without paths', () => {
135+
const input = 'Regular text /Users/john/file.ts more text C:\\Users\\jane\\file.ts end';
136+
const result = anonymizeFilePaths(input);
137+
assert.strictEqual(result, 'Regular text <REDACTED: user-file-path> more text <REDACTED: user-file-path> end');
138+
});
139+
140+
test('should handle very long paths', () => {
141+
const longPath = '/Users/john/' + 'very/long/path/'.repeat(50) + 'file.ts';
142+
const input = `Error in ${longPath}`;
143+
const result = anonymizeFilePaths(input);
144+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
145+
});
146+
147+
test('should handle paths with Unicode characters', () => {
148+
const input = 'Error in /Users/jöhn/workspace/project/src/file.ts';
149+
const result = anonymizeFilePaths(input);
150+
// Note: The current implementation may split Unicode paths
151+
// This is acceptable behavior as it still anonymizes the sensitive parts
152+
assert(result.includes('<REDACTED: user-file-path>'));
153+
assert(!result.includes('/Users/jöhn'));
154+
});
155+
156+
test('should handle Windows UNC paths', () => {
157+
const input = 'Error in \\\\server\\share\\file.ts';
158+
const result = anonymizeFilePaths(input);
159+
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
160+
});
161+
162+
});

0 commit comments

Comments
 (0)