oneuptime/Telemetry/Utils/Exception.ts
Nawaz Dhandala f49b1995df
feat(telemetry): add new Telemetry service (OTel, Syslog, Fluent, Metrics, Traces) and unified ingestion pipeline
- Add Telemetry service entrypoint
  - Telemetry/Index.ts: app bootstrap, routes mounting, infrastructure init and Telemetry SDK init.

- Unified queue + worker
  - Telemetry/Jobs/TelemetryIngest/ProcessTelemetry.ts: single worker that dispatches queued jobs to specific processors (logs, traces, metrics, syslog, fluent logs).
  - Telemetry/Services/Queue/TelemetryQueueService.ts: central queue API and job payload types.
  - Per-type Queue wrappers (LogsQueueService, MetricsQueueService, TracesQueueService, FluentLogsQueueService, SyslogQueueService).

- OpenTelemetry ingestion middleware and proto support
  - Telemetry/Middleware/OtelRequestMiddleware.ts: detect OTLP endpoint (logs/traces/metrics), decode protobuf bodies using protobufjs and set product type.
  - Telemetry/ProtoFiles/OTel/v1/*.proto: include common.proto, logs.proto, metrics.proto, resource.proto, traces.proto for OTLP v1 messages.

- Ingest services
  - Telemetry/Services/OtelLogsIngestService.ts: parse incoming OTLP logs, map attributes, convert timestamps, batch insert logs.
  - Telemetry/Services/OtelTracesIngestService.ts: parse OTLP traces, build span rows, extract exceptions, batch insert spans and exceptions, save telemetry exception summary.
  - Telemetry/Services/OtelMetricsIngestService.ts: parse OTLP metrics, normalize datapoints, batch insert metrics and index metric name -> service map.
  - Telemetry/Services/SyslogIngestService.ts: syslog ingestion endpoints, parser integration, map syslog fields to attributes and logs.
  - Telemetry/Services/FluentLogsIngestService.ts: ingest Fluentd style logs, normalize entries and insert into log backend.
  - Telemetry/Services/OtelIngestBaseService.ts: helpers to resolve service name from attributes/headers.

- Syslog parser and utilities
  - Telemetry/Utils/SyslogParser.ts: robust RFC5424 and RFC3164 parser, structured data extraction and sanitization.
  - Telemetry/Tests/Utils/SyslogParser.test.ts: unit tests for parser behavior.

- Telemetry exception utilities
  - Telemetry/Utils/Exception.ts: generate exception fingerprint and upsert telemetry exception status (saveOrUpdateTelemetryException).

- Queue & job integration
  - New integration with Common/Server/Infrastructure/Queue and QueueWorker, job id generation and telemetry job types.
  - Telemetry services add ingestion jobs instead of processing synchronously.

- Config, build and dev tooling
  - Add Telemetry/package.json, package-lock.json, tsconfig.json, nodemon.json, jest config.
  - New script configs and dependencies (protobufjs, ts-node, jest, nodemon, etc).

- Docker / environment updates
  - docker-compose.base.yml, docker-compose.dev.yml, docker-compose.yml: rename service from open-telemetry-ingest -> telemetry and wire TELEMETRY_* envs.
  - config.example.env: rename and consolidate environment variables (OPEN_TELEMETRY_* -> TELEMETRY_*, update hostnames and ports).
  - Tests/Scripts/status-check.sh: update ready-check target to telemetry/status/ready.

- Other
  - Telemetry/Services/Queue/*: export helpers and legacy-compatible job interface shims.
  - Memory cleanup and batching safeguards across ingest services.
  - Logging and capture spans added to key code paths.

BREAKING CHANGES / MIGRATION NOTES:
- Environment variables and docker service names changed:
  - Replace OPEN_TELEMETRY_... vars with TELEMETRY_... (PORT, HOSTNAME, CONCURRENCY, DISABLE_TELEMETRY, etc).
  - docker-compose entries moved from "open-telemetry-ingest" to "telemetry" and image name changed to oneuptime/telemetry.
  - Update any deployment automation and monitoring checks referencing the old service name or endpoints.
- Consumers: OTLP endpoints and behavior remain supported, but ingestion is now queued and processed asynchronously.

Testing / Running:
- Install deps in Telemetry/ (npm install) after syncing Common workspace.
- Run dev: npx nodemon (nodemon.json) or build & start using provided scripts.
- Run tests with jest (Telemetry test suite includes SyslogParser unit tests).

Files added/modified (high level):
- Added many files under Telemetry/: Index, Jobs, Middleware, ProtoFiles, Services, Utils, Tests, package and config artifacts.
- Modified docker-compose.* and config.example.env and status check script to use new TELEMETRY service/vars.
2025-11-07 21:36:47 +00:00

133 lines
4 KiB
TypeScript

import TelemetryException from "Common/Models/DatabaseModels/TelemetryException";
import TelemetryExceptionService from "Common/Server/Services/TelemetryExceptionService";
import OneUptimeDate from "Common/Types/Date";
import BadDataException from "Common/Types/Exception/BadDataException";
import ObjectID from "Common/Types/ObjectID";
import Crypto from "Common/Utils/Crypto";
export interface ExceptionFingerprintInput {
message?: string;
stackTrace?: string;
exceptionType?: string;
projectId?: ObjectID;
serviceId?: ObjectID;
}
export interface TelemetryExceptionPayload {
fingerprint: string;
projectId: ObjectID;
serviceId: ObjectID;
exceptionType?: string;
stackTrace?: string;
message?: string;
}
export default class ExceptionUtil {
public static getFingerprint(data: ExceptionFingerprintInput): string {
const message: string = data.message || "";
const stackTrace: string = data.stackTrace || "";
const type: string = data.exceptionType || "";
const projectId: string = data.projectId?.toString() || "";
const serviceId: string = data.serviceId?.toString() || "";
const hash: string = Crypto.getSha256Hash(
projectId + serviceId + message + stackTrace + type,
);
return hash;
}
public static async saveOrUpdateTelemetryException(
exception: TelemetryExceptionPayload,
): Promise<void> {
// Exception is saved to main database as well (not just analytics db), so users can assgin it, resolve it, etc.
if (!exception.fingerprint) {
throw new BadDataException(
"Fingerprint is required to save exception status",
);
}
if (!exception.projectId) {
throw new BadDataException(
"Project ID is required to save exception status",
);
}
if (!exception.serviceId) {
throw new BadDataException(
"Service ID is required to save exception status",
);
}
const fingerprint: string = exception.fingerprint;
// check if the exception with the same fingerprint already exists in the database
const existingExceptionStatus: TelemetryException | null =
await TelemetryExceptionService.findOneBy({
query: {
fingerprint: fingerprint,
projectId: exception.projectId,
telemetryServiceId: exception.serviceId,
},
select: {
_id: true,
occuranceCount: true,
},
props: {
isRoot: true,
},
});
if (existingExceptionStatus) {
// then update last seen as and unmark as resolved/muted
await TelemetryExceptionService.updateOneBy({
query: {
_id: existingExceptionStatus._id,
},
data: {
lastSeenAt: OneUptimeDate.now(),
markedAsResolvedByUserId: null,
isResolved: false,
markedAsResolvedAt: null, // unmark as resolved if it was marked as resolved
occuranceCount: (existingExceptionStatus.occuranceCount || 0) + 1,
},
props: {
isRoot: true,
},
});
}
if (!existingExceptionStatus) {
// Create a new exception status if it doesn't exist
const newExceptionStatus: TelemetryException = new TelemetryException();
newExceptionStatus.fingerprint = exception.fingerprint;
newExceptionStatus.projectId = exception.projectId;
newExceptionStatus.telemetryServiceId = exception.serviceId;
newExceptionStatus.lastSeenAt = OneUptimeDate.now();
newExceptionStatus.firstSeenAt = OneUptimeDate.now();
newExceptionStatus.occuranceCount = 1;
if (exception.exceptionType) {
newExceptionStatus.exceptionType = exception.exceptionType;
}
if (exception.message) {
newExceptionStatus.message = exception.message;
}
if (exception.stackTrace) {
newExceptionStatus.stackTrace = exception.stackTrace;
}
// Save the new exception status to the database
await TelemetryExceptionService.create({
data: newExceptionStatus,
props: {
isRoot: true,
},
});
}
}
}