feat: Implement normalization for dynamic values in exception messages and add comprehensive tests

This commit is contained in:
Nawaz Dhandala 2025-12-28 11:30:39 +00:00
parent cd7dfc4efb
commit f11c8fd60b
No known key found for this signature in database
GPG key ID: 96C5DCA24769DBCA
3 changed files with 553 additions and 2 deletions

View file

@ -0,0 +1,371 @@
import ExceptionUtil from "../../Utils/Exception";
import ObjectID from "Common/Types/ObjectID";
describe("ExceptionUtil", () => {
describe("normalizeForFingerprint", () => {
test("normalizes Stripe subscription IDs", () => {
const message1: string =
"No such subscription: 'sub_1POgR8ANuQdJ93r7dySVHs4K'";
const message2: string =
"No such subscription: 'sub_1PRZvTANuQdJ93r7K1nhUFZ9'";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("No such subscription: '<STRIPE_ID>'");
});
test("normalizes Stripe customer IDs", () => {
const message1: string = "Customer cus_ABC123DEF456GHI not found";
const message2: string = "Customer cus_XYZ789JKL012MNO not found";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Customer <STRIPE_ID> not found");
});
test("normalizes UUIDs", () => {
const message1: string =
"Failed to find resource 550e8400-e29b-41d4-a716-446655440000";
const message2: string =
"Failed to find resource a1b2c3d4-e5f6-7890-abcd-ef1234567890";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Failed to find resource <UUID>");
});
test("normalizes MongoDB ObjectIDs", () => {
const message1: string =
"Document not found: 507f1f77bcf86cd799439011";
const message2: string =
"Document not found: 60a1b2c3d4e5f6a7b8c9d0e1";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Document not found: <OBJECT_ID>");
});
test("normalizes IP addresses", () => {
const message1: string = "Connection refused from 192.168.1.100";
const message2: string = "Connection refused from 10.0.0.50";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Connection refused from <IP>");
});
test("normalizes email addresses", () => {
const message1: string = "Invalid email: user@example.com";
const message2: string = "Invalid email: admin@company.org";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Invalid email: <EMAIL>");
});
test("normalizes timestamps", () => {
const message1: string =
"Request failed at 2024-03-15T14:30:00.000Z";
const message2: string =
"Request failed at 2024-12-01T09:15:30.500Z";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Request failed at <TIMESTAMP>");
});
test("normalizes Unix timestamps", () => {
const message1: string = "Event occurred at 1710511800000";
const message2: string = "Event occurred at 1733059530500";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Event occurred at <TIMESTAMP>");
});
test("normalizes memory addresses", () => {
const message1: string = "Segmentation fault at 0x7fff5fbff8c0";
const message2: string = "Segmentation fault at 0x00007ffe12345678";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Segmentation fault at <MEMORY_ADDR>");
});
test("normalizes session IDs", () => {
const message1: string = "Session expired: session_id=abc123def456";
const message2: string = "Session expired: session_id=xyz789jkl012";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Session expired: session_id=<SESSION>");
});
test("normalizes request IDs", () => {
const message1: string = "Request failed: request_id=req_abc123";
const message2: string = "Request failed: request_id=req_xyz789";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Request failed: request_id=<REQUEST>");
});
test("normalizes large numbers", () => {
// Large numbers (8+ digits) may match hex pattern since 0-9 are valid hex
// The important thing is both normalize to the same value
const message1: string = "User 8234567890 not found";
const message2: string = "User 9876543210 not found";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
// Both should normalize to the same value (ensuring same fingerprint)
expect(normalized1).toBe(normalized2);
});
test("normalizes 7-digit numbers as NUMBER", () => {
// 7-digit numbers don't match hex pattern (8+ chars) so fall through to NUMBER
const message1: string = "Error code 1234567";
const message2: string = "Error code 9876543";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Error code <NUMBER>");
});
test("handles empty string", () => {
const normalized: string = ExceptionUtil.normalizeForFingerprint("");
expect(normalized).toBe("");
});
test("handles null/undefined gracefully", () => {
// @ts-expect-error - testing edge case
const normalizedNull: string = ExceptionUtil.normalizeForFingerprint(null);
// @ts-expect-error - testing edge case
const normalizedUndefined: string = ExceptionUtil.normalizeForFingerprint(undefined);
expect(normalizedNull).toBe("");
expect(normalizedUndefined).toBe("");
});
test("preserves meaningful text while normalizing IDs", () => {
const message: string =
"Failed to process payment for customer cus_ABC123DEF456GHI: Card declined";
const normalized: string = ExceptionUtil.normalizeForFingerprint(message);
expect(normalized).toBe(
"Failed to process payment for customer <STRIPE_ID>: Card declined",
);
});
test("normalizes multiple dynamic values in same message", () => {
const message1: string =
"User user@example.com (id=12345678) failed to access resource 550e8400-e29b-41d4-a716-446655440000";
const message2: string =
"User admin@company.org (id=87654321) failed to access resource a1b2c3d4-e5f6-7890-abcd-ef1234567890";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
});
test("normalizes JWT tokens", () => {
const message1: string =
"Invalid token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
const message2: string =
"Invalid token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5ODc2NTQzMjEwIiwibmFtZSI6IkphbmUgRG9lIiwiaWF0IjoxNjE2MjM5MDIyfQ.DifferentSignatureHere123456789";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Invalid token: <JWT>");
});
test("normalizes generic service IDs with prefix_alphanumeric pattern", () => {
const message1: string = "Failed to find resource aws_abc123def456";
const message2: string = "Failed to find resource aws_xyz789jkl012";
const normalized1: string =
ExceptionUtil.normalizeForFingerprint(message1);
const normalized2: string =
ExceptionUtil.normalizeForFingerprint(message2);
expect(normalized1).toBe(normalized2);
expect(normalized1).toBe("Failed to find resource <SERVICE_ID>");
});
});
describe("getFingerprint", () => {
test("generates same fingerprint for exceptions with different dynamic IDs", () => {
const projectId: ObjectID = ObjectID.generate();
const serviceId: ObjectID = ObjectID.generate();
const fingerprint1: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "No such subscription: 'sub_1POgR8ANuQdJ93r7dySVHs4K'",
exceptionType: "StripeError",
stackTrace: "at processPayment (payment.js:100)",
});
const fingerprint2: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "No such subscription: 'sub_1PRZvTANuQdJ93r7K1nhUFZ9'",
exceptionType: "StripeError",
stackTrace: "at processPayment (payment.js:100)",
});
expect(fingerprint1).toBe(fingerprint2);
});
test("generates different fingerprints for different exception types", () => {
const projectId: ObjectID = ObjectID.generate();
const serviceId: ObjectID = ObjectID.generate();
const fingerprint1: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "No such subscription: 'sub_1POgR8ANuQdJ93r7dySVHs4K'",
exceptionType: "StripeError",
stackTrace: "at processPayment (payment.js:100)",
});
const fingerprint2: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "No such subscription: 'sub_1PRZvTANuQdJ93r7K1nhUFZ9'",
exceptionType: "PaymentError",
stackTrace: "at processPayment (payment.js:100)",
});
expect(fingerprint1).not.toBe(fingerprint2);
});
test("generates different fingerprints for different services", () => {
const projectId: ObjectID = ObjectID.generate();
const serviceId1: ObjectID = ObjectID.generate();
const serviceId2: ObjectID = ObjectID.generate();
const fingerprint1: string = ExceptionUtil.getFingerprint({
projectId,
serviceId: serviceId1,
message: "No such subscription: 'sub_1POgR8ANuQdJ93r7dySVHs4K'",
exceptionType: "StripeError",
});
const fingerprint2: string = ExceptionUtil.getFingerprint({
projectId,
serviceId: serviceId2,
message: "No such subscription: 'sub_1PRZvTANuQdJ93r7K1nhUFZ9'",
exceptionType: "StripeError",
});
expect(fingerprint1).not.toBe(fingerprint2);
});
test("generates different fingerprints for different projects", () => {
const projectId1: ObjectID = ObjectID.generate();
const projectId2: ObjectID = ObjectID.generate();
const serviceId: ObjectID = ObjectID.generate();
const fingerprint1: string = ExceptionUtil.getFingerprint({
projectId: projectId1,
serviceId,
message: "Error occurred",
exceptionType: "Error",
});
const fingerprint2: string = ExceptionUtil.getFingerprint({
projectId: projectId2,
serviceId,
message: "Error occurred",
exceptionType: "Error",
});
expect(fingerprint1).not.toBe(fingerprint2);
});
test("generates same fingerprint for similar stack traces with different line numbers", () => {
const projectId: ObjectID = ObjectID.generate();
const serviceId: ObjectID = ObjectID.generate();
// Stack traces might have memory addresses or other dynamic values
const fingerprint1: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "NullPointerException",
exceptionType: "NullPointerException",
stackTrace:
"at com.example.MyClass.method(MyClass.java:42)\nat 0x7fff5fbff8c0",
});
const fingerprint2: string = ExceptionUtil.getFingerprint({
projectId,
serviceId,
message: "NullPointerException",
exceptionType: "NullPointerException",
stackTrace:
"at com.example.MyClass.method(MyClass.java:42)\nat 0x00007ffe12345678",
});
expect(fingerprint1).toBe(fingerprint2);
});
});
});

View file

@ -23,6 +23,174 @@ export interface TelemetryExceptionPayload {
}
export default class ExceptionUtil {
/**
* Normalizes a string by replacing dynamic values with placeholders.
* This ensures that exceptions with the same root cause but different
* dynamic values (like IDs, timestamps, etc.) get the same fingerprint.
*
* @param text - The text to normalize (message or stack trace)
* @returns The normalized text with dynamic values replaced
*/
public static normalizeForFingerprint(text: string): string {
if (!text) {
return "";
}
let normalized: string = text;
// Order matters! More specific patterns should come before generic ones.
// 1. UUIDs (e.g., 550e8400-e29b-41d4-a716-446655440000)
normalized = normalized.replace(
/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/gi,
"<UUID>",
);
// 2. MongoDB ObjectIDs (24 hex characters)
normalized = normalized.replace(/\b[0-9a-f]{24}\b/gi, "<OBJECT_ID>");
// 3. Stripe-style IDs (e.g., sub_xxx, cus_xxx, pi_xxx, ch_xxx, etc.)
// These have a prefix followed by underscore and alphanumeric characters
normalized = normalized.replace(
/\b(sub|cus|pi|ch|pm|card|price|prod|inv|txn|evt|req|acct|payout|ba|btok|src|tok|seti|si|cs|link|file|dp|icr|ii|il|is|isci|mbur|or|po|qt|rcpt|re|refund|sku|tax|txi|tr|us|wh)_[A-Za-z0-9]{10,32}\b/g,
"<STRIPE_ID>",
);
// 4. Generic API/Service IDs - alphanumeric strings that look like IDs
// Matches patterns like: prefix_alphanumeric or just long alphanumeric strings
// Common in many services (AWS, GCP, etc.)
normalized = normalized.replace(
/\b[a-z]{2,10}_[A-Za-z0-9]{8,}\b/g,
"<SERVICE_ID>",
);
// 5. JWT tokens (three base64 segments separated by dots)
normalized = normalized.replace(
/\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]+\b/g,
"<JWT>",
);
// 6. Base64 encoded strings (long sequences, likely tokens or encoded data)
normalized = normalized.replace(
/\b[A-Za-z0-9+/]{40,}={0,2}\b/g,
"<BASE64>",
);
// 7. IP addresses (IPv4)
normalized = normalized.replace(
/\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
"<IP>",
);
// 8. IP addresses (IPv6) - simplified pattern
normalized = normalized.replace(
/\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
"<IPV6>",
);
normalized = normalized.replace(/\b::1\b/g, "<IPV6>"); // localhost IPv6
// 9. Email addresses
normalized = normalized.replace(
/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
"<EMAIL>",
);
// 10. URLs with dynamic paths/query params (normalize the dynamic parts)
// Keep the domain but normalize path segments that look like IDs
normalized = normalized.replace(
/\/[0-9a-f]{8,}(?=\/|$|\?|#|\s|'|")/gi,
"/<ID>",
);
// 11. Timestamps in various formats
// ISO 8601 timestamps
normalized = normalized.replace(
/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g,
"<TIMESTAMP>",
);
// Unix timestamps (10 or 13 digits)
normalized = normalized.replace(/\b1[0-9]{9,12}\b/g, "<TIMESTAMP>");
// 12. Date formats (YYYY-MM-DD, MM/DD/YYYY, etc.)
normalized = normalized.replace(
/\b\d{4}[-/]\d{2}[-/]\d{2}\b/g,
"<DATE>",
);
normalized = normalized.replace(
/\b\d{2}[-/]\d{2}[-/]\d{4}\b/g,
"<DATE>",
);
// 13. Time formats (HH:MM:SS, HH:MM)
normalized = normalized.replace(
/\b\d{2}:\d{2}(?::\d{2})?\b/g,
"<TIME>",
);
// 14. Memory addresses (0x followed by hex)
normalized = normalized.replace(/\b0x[0-9a-fA-F]+\b/g, "<MEMORY_ADDR>");
// 15. Session IDs (common patterns) - MUST come before hex ID pattern
normalized = normalized.replace(
/\bsession[_-]?id[=:\s]*['"]?[A-Za-z0-9_-]+['"]?/gi,
"session_id=<SESSION>",
);
// 16. Request IDs (common patterns) - MUST come before hex ID pattern
normalized = normalized.replace(
/\brequest[_-]?id[=:\s]*['"]?[A-Za-z0-9_-]+['"]?/gi,
"request_id=<REQUEST>",
);
// 17. Correlation IDs - MUST come before hex ID pattern
normalized = normalized.replace(
/\bcorrelation[_-]?id[=:\s]*['"]?[A-Za-z0-9_-]+['"]?/gi,
"correlation_id=<CORRELATION>",
);
// 18. Transaction IDs - MUST come before hex ID pattern
normalized = normalized.replace(
/\btransaction[_-]?id[=:\s]*['"]?[A-Za-z0-9_-]+['"]?/gi,
"transaction_id=<TRANSACTION>",
);
// 19. Hex strings that are likely IDs (8+ chars)
normalized = normalized.replace(/\b[0-9a-f]{8,}\b/gi, "<HEX_ID>");
// 20. Quoted strings containing IDs or dynamic values
// Match strings in single or double quotes that look like IDs
normalized = normalized.replace(
/'[A-Za-z0-9_-]{16,}'/g,
"'<ID>'",
);
normalized = normalized.replace(
/"[A-Za-z0-9_-]{16,}"/g,
'"<ID>"',
);
// 21. Port numbers in URLs or connection strings
normalized = normalized.replace(/:(\d{4,5})(?=\/|$|\s)/g, ":<PORT>");
// 22. Line numbers in stack traces (keep for context, but normalize large numbers)
// This normalizes specific line/column references that might vary
normalized = normalized.replace(/:\d+:\d+\)?$/gm, ":<LINE>:<COL>)");
// 23. Process/Thread IDs
normalized = normalized.replace(/\bPID[:\s]*\d+\b/gi, "PID:<PID>");
normalized = normalized.replace(/\bTID[:\s]*\d+\b/gi, "TID:<TID>");
// 24. Numeric IDs in common patterns (id=123, id: 123, etc.)
normalized = normalized.replace(
/\bid[=:\s]*['"]?\d+['"]?/gi,
"id=<ID>",
);
// 25. Large numbers that are likely IDs (more than 6 digits)
normalized = normalized.replace(/\b\d{7,}\b/g, "<NUMBER>");
return normalized;
}
public static getFingerprint(data: ExceptionFingerprintInput): string {
const message: string = data.message || "";
const stackTrace: string = data.stackTrace || "";
@ -30,8 +198,15 @@ export default class ExceptionUtil {
const projectId: string = data.projectId?.toString() || "";
const serviceId: string = data.serviceId?.toString() || "";
// Normalize message and stack trace to group similar exceptions together
// This replaces dynamic values like IDs, timestamps, etc. with placeholders
const normalizedMessage: string =
ExceptionUtil.normalizeForFingerprint(message);
const normalizedStackTrace: string =
ExceptionUtil.normalizeForFingerprint(stackTrace);
const hash: string = Crypto.getSha256Hash(
projectId + serviceId + message + stackTrace + type,
projectId + serviceId + normalizedMessage + normalizedStackTrace + type,
);
return hash;

View file

@ -279,6 +279,7 @@
"resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.6.tgz",
"integrity": "sha512-FxpRyGjrMJXh7X3wGLGhNDCRiwpWEF74sKjTLDJSG5Kyvow3QZaG0Adbqzi9ZrVjTWpsX+2cxWXD71NMg93kdw==",
"dev": true,
"peer": true,
"dependencies": {
"@ampproject/remapping": "^2.2.0",
"@babel/code-frame": "^7.23.5",
@ -1304,7 +1305,8 @@
"node_modules/@types/node": {
"version": "17.0.45",
"resolved": "https://registry.npmjs.org/@types/node/-/node-17.0.45.tgz",
"integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw=="
"integrity": "sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw==",
"peer": true
},
"node_modules/@types/prettier": {
"version": "2.7.3",
@ -1575,6 +1577,7 @@
"url": "https://github.com/sponsors/ai"
}
],
"peer": true,
"dependencies": {
"caniuse-lite": "^1.0.30001565",
"electron-to-chromium": "^1.4.601",
@ -2516,6 +2519,7 @@
"resolved": "https://registry.npmjs.org/jest/-/jest-28.1.3.tgz",
"integrity": "sha512-N4GT5on8UkZgH0O5LUavMRV1EDEhNTL0KEfRmDIeZHSV7p2XgLoY9t9VDUgL6o+yfdgYHVxuz81G8oB9VG5uyA==",
"dev": true,
"peer": true,
"dependencies": {
"@jest/core": "^28.1.3",
"@jest/types": "^28.1.3",
@ -4441,6 +4445,7 @@
"version": "10.9.2",
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
"integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
"peer": true,
"dependencies": {
"@cspotcode/source-map-support": "^0.8.0",
"@tsconfig/node10": "^1.0.7",