diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 9ecef022..4829b19b 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -3,6 +3,7 @@ export * from "./collections/index.js"; export * from "./compression/index.js"; export * from "./diff/index.js"; export * from "./hash/index.js"; +export * from "./msgpack/index.js"; export * from "./streams/index.js"; // Note: files/ is exported via subpath "@statewalker/vcs-utils/files" diff --git a/packages/utils/src/msgpack/README.md b/packages/utils/src/msgpack/README.md new file mode 100644 index 00000000..e34654a9 --- /dev/null +++ b/packages/utils/src/msgpack/README.md @@ -0,0 +1,122 @@ +# MessagePack TypeScript Implementation + +A pure TypeScript implementation of the [MessagePack](https://msgpack.org/) serialization format. + +## References + +- **MessagePack Specification**: https://github.com/msgpack/msgpack/blob/master/spec.md +- **Original JavaScript Implementation**: https://github.com/cuzic/MessagePack-JS (MIT License) + +## Usage + +```typescript +import { pack, unpack } from "@statewalker/vcs-utils"; + +// Encode data +const packed = pack({ hello: "world", count: 42 }); +// packed is a Uint8Array + +// Decode data +const unpacked = unpack(packed); +// { hello: "world", count: 42 } +``` + +## API + +### Functions + +#### `pack(value, options?): Uint8Array` + +Encode a JavaScript value to MessagePack format. + +```typescript +const bytes = pack({ name: "test", enabled: true }); +``` + +#### `unpack(data, options?): MessagePackValue` + +Decode MessagePack data to a JavaScript value. + +```typescript +const value = unpack(bytes); +``` + +#### `packToString(value, options?): string` + +Encode to a string where each character represents a byte. Useful for compatibility with legacy code. + +### Classes + +#### `Encoder` + +Reusable encoder instance. + +```typescript +const encoder = new Encoder(); +const bytes1 = encoder.pack(42); +const bytes2 = encoder.pack("hello"); +``` + +#### `Decoder` + +Decoder with configurable character set. + +```typescript +const decoder = new Decoder(data, { charSet: "utf-8" }); +const value = decoder.unpack(); +``` + +### Options + +#### DecoderOptions + +```typescript +interface DecoderOptions { + charSet?: CharSet | "utf-8" | "ascii" | "utf16" | "byte-array"; +} +``` + +- `utf-8`: Decode strings as UTF-8 (default) +- `ascii`: Decode strings as ASCII +- `byte-array`: Return raw byte arrays instead of strings + +#### EncoderOptions + +```typescript +interface EncoderOptions { + utf8Strings?: boolean; // Encode strings as UTF-8 (default: true) +} +``` + +## Supported Types + +| JavaScript Type | MessagePack Format | +|----------------|-------------------| +| `null` | nil | +| `boolean` | true/false | +| `number` (integer) | fixint, uint8-64, int8-64 | +| `number` (float) | float64 | +| `string` | fixstr, str8-32 | +| `Uint8Array` | bin8-32 | +| `Array` | fixarray, array16-32 | +| `Object` | fixmap, map16-32 | + +## Format Details + +The encoder automatically selects the most compact format: + +- Integers 0-127: single byte (positive fixint) +- Integers -32 to -1: single byte (negative fixint) +- Strings up to 31 bytes: 1-byte header (fixstr) +- Arrays up to 15 elements: 1-byte header (fixarray) +- Maps up to 15 pairs: 1-byte header (fixmap) + +## Limitations + +- 64-bit integers may lose precision beyond Number.MAX_SAFE_INTEGER +- Extension types are decoded as `undefined` (not fully implemented) +- UTF-16 charset is not implemented + +## License + +This implementation is based on [MessagePack-JS](https://github.com/cuzic/MessagePack-JS) by cuzic, released under the MIT License. diff --git a/packages/utils/src/msgpack/decoder.ts b/packages/utils/src/msgpack/decoder.ts new file mode 100644 index 00000000..5f034280 --- /dev/null +++ b/packages/utils/src/msgpack/decoder.ts @@ -0,0 +1,419 @@ +/** + * MessagePack Decoder + * + * Decodes MessagePack binary data into JavaScript values. + * + * Based on the MessagePack specification: + * https://github.com/msgpack/msgpack/blob/master/spec.md + * + * Original JS implementation by cuzic: + * https://github.com/cuzic/MessagePack-JS + */ + +import { + CharSet, + type DecoderOptions, + FixedRange, + Format, + type MessagePackValue, +} from "./types.js"; + +/** + * Decoder for MessagePack binary data + */ +export class Decoder { + private data: Uint8Array; + private index = 0; + private charSet: CharSet; + + /** + * Create a new decoder + * @param data - Binary data to decode (Uint8Array or string) + * @param options - Decoder options + */ + constructor(data: Uint8Array | string, options: DecoderOptions = {}) { + if (typeof data === "string") { + // Convert string to Uint8Array (each char code becomes a byte) + const bytes = new Uint8Array(data.length); + for (let i = 0; i < data.length; i++) { + bytes[i] = data.charCodeAt(i) & 0xff; + } + this.data = bytes; + } else { + this.data = data; + } + + this.charSet = this.parseCharSet(options.charSet); + } + + private parseCharSet(charSet: DecoderOptions["charSet"] | undefined): CharSet { + if (charSet === undefined) return CharSet.UTF8; + if (typeof charSet === "number") return charSet; + + switch (charSet) { + case "utf-8": + return CharSet.UTF8; + case "ascii": + return CharSet.ASCII; + case "utf16": + return CharSet.UTF16; + case "byte-array": + return CharSet.ByteArray; + default: + return CharSet.UTF8; + } + } + + /** + * Decode the next value from the buffer + */ + unpack(): MessagePackValue { + const type = this.unpackUint8(); + + // Positive fixint (0x00 - 0x7f) + if (type <= FixedRange.POSITIVE_FIXINT_MAX) { + return type; + } + + // Negative fixint (0xe0 - 0xff) + if (type >= FixedRange.NEGATIVE_FIXINT_PREFIX) { + return type - 256; + } + + // Fixmap (0x80 - 0x8f) + if (type >= FixedRange.FIXMAP_PREFIX && type <= FixedRange.FIXMAP_MAX) { + const size = type & 0x0f; + return this.unpackMap(size); + } + + // Fixarray (0x90 - 0x9f) + if (type >= FixedRange.FIXARRAY_PREFIX && type <= FixedRange.FIXARRAY_MAX) { + const size = type & 0x0f; + return this.unpackArray(size); + } + + // Fixstr (0xa0 - 0xbf) + if (type >= FixedRange.FIXSTR_PREFIX && type <= FixedRange.FIXSTR_MAX) { + const size = type & 0x1f; + return this.unpackRaw(size); + } + + // Other formats + switch (type) { + case Format.NIL: + return null; + case Format.NEVER_USED: + return undefined; + case Format.FALSE: + return false; + case Format.TRUE: + return true; + + case Format.BIN8: { + const size = this.unpackUint8(); + return this.unpackBinary(size); + } + case Format.BIN16: { + const size = this.unpackUint16(); + return this.unpackBinary(size); + } + case Format.BIN32: { + const size = this.unpackUint32(); + return this.unpackBinary(size); + } + + case Format.FLOAT32: + return this.unpackFloat32(); + case Format.FLOAT64: + return this.unpackFloat64(); + + case Format.UINT8: + return this.unpackUint8(); + case Format.UINT16: + return this.unpackUint16(); + case Format.UINT32: + return this.unpackUint32(); + case Format.UINT64: + return this.unpackUint64(); + + case Format.INT8: + return this.unpackInt8(); + case Format.INT16: + return this.unpackInt16(); + case Format.INT32: + return this.unpackInt32(); + case Format.INT64: + return this.unpackInt64(); + + // Fixed extension types return undefined (not fully implemented) + case Format.FIXEXT1: + case Format.FIXEXT2: + case Format.FIXEXT4: + case Format.FIXEXT8: + case Format.FIXEXT16: + return this.unpackFixExt(type); + + case Format.STR8: { + const size = this.unpackUint8(); + return this.unpackRaw(size); + } + case Format.STR16: { + const size = this.unpackUint16(); + return this.unpackRaw(size); + } + case Format.STR32: { + const size = this.unpackUint32(); + return this.unpackRaw(size); + } + + case Format.ARRAY16: { + const size = this.unpackUint16(); + return this.unpackArray(size); + } + case Format.ARRAY32: { + const size = this.unpackUint32(); + return this.unpackArray(size); + } + + case Format.MAP16: { + const size = this.unpackUint16(); + return this.unpackMap(size); + } + case Format.MAP32: { + const size = this.unpackUint32(); + return this.unpackMap(size); + } + + default: + throw new Error(`MessagePack: unknown format type 0x${type.toString(16)}`); + } + } + + private unpackUint8(): number { + if (this.index >= this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + return this.data[this.index++]; + } + + private unpackUint16(): number { + if (this.index + 2 > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + const value = (this.data[this.index] << 8) | this.data[this.index + 1]; + this.index += 2; + return value; + } + + private unpackUint32(): number { + if (this.index + 4 > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + const value = + ((this.data[this.index] * 256 + this.data[this.index + 1]) * 256 + + this.data[this.index + 2]) * + 256 + + this.data[this.index + 3]; + this.index += 4; + return value >>> 0; // Ensure unsigned + } + + private unpackUint64(): number { + if (this.index + 8 > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + // Note: JavaScript numbers can only safely represent integers up to 2^53-1 + // For larger values, precision may be lost + let value = 0; + for (let i = 0; i < 8; i++) { + value = value * 256 + this.data[this.index + i]; + } + this.index += 8; + return value; + } + + private unpackInt8(): number { + const uint8 = this.unpackUint8(); + return uint8 < 0x80 ? uint8 : uint8 - 256; + } + + private unpackInt16(): number { + const uint16 = this.unpackUint16(); + return uint16 < 0x8000 ? uint16 : uint16 - 65536; + } + + private unpackInt32(): number { + const uint32 = this.unpackUint32(); + return uint32 < 0x80000000 ? uint32 : uint32 - 4294967296; + } + + private unpackInt64(): number { + const uint64 = this.unpackUint64(); + // Note: JavaScript numbers can only safely represent integers up to 2^53-1 + return uint64 < 2 ** 63 ? uint64 : uint64 - 2 ** 64; + } + + private unpackFloat32(): number { + if (this.index + 4 > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + + const uint32 = this.unpackUint32(); + // Handle special cases + if (uint32 === 0) return 0; + if (uint32 === 0x80000000) return -0; + + const sign = uint32 >> 31; + const exp = ((uint32 >> 23) & 0xff) - 127; + const fraction = (uint32 & 0x7fffff) | 0x800000; + + return (sign === 0 ? 1 : -1) * fraction * 2 ** (exp - 23); + } + + private unpackFloat64(): number { + if (this.index + 8 > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + + const h32 = + ((this.data[this.index] * 256 + this.data[this.index + 1]) * 256 + + this.data[this.index + 2]) * + 256 + + this.data[this.index + 3]; + const l32 = + ((this.data[this.index + 4] * 256 + this.data[this.index + 5]) * 256 + + this.data[this.index + 6]) * + 256 + + this.data[this.index + 7]; + this.index += 8; + + // Handle special cases + if (h32 === 0 && l32 === 0) return 0; + if (h32 === 0x80000000 && l32 === 0) return -0; + + const sign = h32 >> 31; + const exp = ((h32 >> 20) & 0x7ff) - 1023; + const hfrac = (h32 & 0xfffff) | 0x100000; + const frac = hfrac * 2 ** (exp - 20) + l32 * 2 ** (exp - 52); + + return (sign === 0 ? 1 : -1) * frac; + } + + private unpackRaw(size: number): string | number[] { + if (this.index + size > this.data.length) { + throw new Error( + `MessagePack: index is out of range ${this.index} ${size} ${this.data.length}`, + ); + } + + const bytes = this.data.slice(this.index, this.index + size); + this.index += size; + + if (this.charSet === CharSet.ASCII) { + // ASCII 8-bit encoding + return String.fromCharCode(...bytes); + } else if (this.charSet === CharSet.ByteArray) { + // Return raw byte array + return Array.from(bytes); + } else { + // UTF-8 decoding + return this.decodeUtf8(bytes); + } + } + + private unpackBinary(size: number): Uint8Array { + if (this.index + size > this.data.length) { + throw new Error("MessagePack: index is out of range"); + } + + const bytes = this.data.slice(this.index, this.index + size); + this.index += size; + return bytes; + } + + private decodeUtf8(bytes: Uint8Array): string { + let i = 0; + let str = ""; + + while (i < bytes.length) { + const c = bytes[i]; + + if (c < 0x80) { + // Single byte character (ASCII) + str += String.fromCharCode(c); + i++; + } else if ((c & 0xe0) === 0xc0) { + // Two byte character + const code = ((c & 0x1f) << 6) | (bytes[i + 1] & 0x3f); + str += String.fromCharCode(code); + i += 2; + } else if ((c & 0xf0) === 0xe0) { + // Three byte character + const code = ((c & 0x0f) << 12) | ((bytes[i + 1] & 0x3f) << 6) | (bytes[i + 2] & 0x3f); + str += String.fromCharCode(code); + i += 3; + } else if ((c & 0xf8) === 0xf0) { + // Four byte character (surrogate pair) + const code = + ((c & 0x07) << 18) | + ((bytes[i + 1] & 0x3f) << 12) | + ((bytes[i + 2] & 0x3f) << 6) | + (bytes[i + 3] & 0x3f); + // Convert to surrogate pair + const codePoint = code - 0x10000; + str += String.fromCharCode(0xd800 + (codePoint >> 10), 0xdc00 + (codePoint & 0x3ff)); + i += 4; + } else { + // Invalid UTF-8 sequence, skip byte + i++; + } + } + + return str; + } + + private unpackArray(size: number): MessagePackValue[] { + const array: MessagePackValue[] = new Array(size); + for (let i = 0; i < size; i++) { + array[i] = this.unpack(); + } + return array; + } + + private unpackMap(size: number): { [key: string]: MessagePackValue } { + const map: { [key: string]: MessagePackValue } = {}; + for (let i = 0; i < size; i++) { + const key = this.unpack(); + const value = this.unpack(); + map[String(key)] = value; + } + return map; + } + + private unpackFixExt(type: number): undefined { + // Skip extension data based on type + let size: number; + switch (type) { + case Format.FIXEXT1: + size = 1; + break; + case Format.FIXEXT2: + size = 2; + break; + case Format.FIXEXT4: + size = 4; + break; + case Format.FIXEXT8: + size = 8; + break; + case Format.FIXEXT16: + size = 16; + break; + default: + size = 0; + } + // Skip type byte + data + this.index += 1 + size; + return undefined; + } +} diff --git a/packages/utils/src/msgpack/encoder.ts b/packages/utils/src/msgpack/encoder.ts new file mode 100644 index 00000000..b7722646 --- /dev/null +++ b/packages/utils/src/msgpack/encoder.ts @@ -0,0 +1,407 @@ +/** + * MessagePack Encoder + * + * Encodes JavaScript values into MessagePack binary format. + * + * Based on the MessagePack specification: + * https://github.com/msgpack/msgpack/blob/master/spec.md + * + * Original JS implementation by cuzic: + * https://github.com/cuzic/MessagePack-JS + */ + +import { type EncoderOptions, Format, type MessagePackValue } from "./types.js"; + +/** + * Encoder for MessagePack binary data + */ +export class Encoder { + private buffer: number[] = []; + private utf8Strings: boolean; + + /** + * Create a new encoder + * @param options - Encoder options + */ + constructor(options: EncoderOptions = {}) { + this.utf8Strings = options.utf8Strings !== false; + } + + /** + * Encode a value to MessagePack format + * @param value - Value to encode + * @returns Uint8Array containing the encoded data + */ + pack(value: MessagePackValue): Uint8Array { + this.buffer = []; + this.packValue(value); + return new Uint8Array(this.buffer); + } + + /** + * Encode a value to a string (for compatibility with original implementation) + * @param value - Value to encode + * @returns String where each character represents a byte + */ + packToString(value: MessagePackValue): string { + const bytes = this.pack(value); + return String.fromCharCode(...bytes); + } + + private packValue(value: MessagePackValue): void { + if (value === null) { + this.buffer.push(Format.NIL); + return; + } + + if (value === undefined) { + this.buffer.push(Format.NIL); + return; + } + + const type = typeof value; + + if (type === "boolean") { + this.buffer.push(value ? Format.TRUE : Format.FALSE); + return; + } + + if (type === "number") { + if (Number.isInteger(value)) { + this.packInteger(value); + } else { + this.packFloat64(value); + } + return; + } + + if (type === "string") { + this.packString(value); + return; + } + + if (value instanceof Uint8Array) { + this.packBinary(value); + return; + } + + if (Array.isArray(value)) { + this.packArray(value); + return; + } + + if (type === "object") { + this.packObject(value as { [key: string]: MessagePackValue }); + return; + } + + throw new Error(`MessagePack: unsupported type ${type}`); + } + + private packInteger(num: number): void { + // Positive fixint (0 to 127) + if (num >= 0 && num <= 0x7f) { + this.buffer.push(num); + return; + } + + // Negative fixint (-32 to -1) + if (num >= -0x20 && num < 0) { + this.buffer.push(num & 0xff); + return; + } + + // uint8 (0 to 255) + if (num >= 0 && num <= 0xff) { + this.buffer.push(Format.UINT8); + this.packUint8(num); + return; + } + + // int8 (-128 to -33) + if (num >= -0x80 && num < -0x20) { + this.buffer.push(Format.INT8); + this.packInt8(num); + return; + } + + // uint16 (256 to 65535) + if (num >= 0 && num <= 0xffff) { + this.buffer.push(Format.UINT16); + this.packUint16(num); + return; + } + + // int16 (-32768 to -129) + if (num >= -0x8000 && num < -0x80) { + this.buffer.push(Format.INT16); + this.packInt16(num); + return; + } + + // uint32 (65536 to 4294967295) + if (num >= 0 && num <= 0xffffffff) { + this.buffer.push(Format.UINT32); + this.packUint32(num); + return; + } + + // int32 (-2147483648 to -32769) + if (num >= -0x80000000 && num < -0x8000) { + this.buffer.push(Format.INT32); + this.packInt32(num); + return; + } + + // int64 or uint64 for larger values + if (num < 0) { + this.buffer.push(Format.INT64); + this.packInt64(num); + } else { + this.buffer.push(Format.UINT64); + this.packUint64(num); + } + } + + private packUint8(num: number): void { + this.buffer.push(num & 0xff); + } + + private packUint16(num: number): void { + this.buffer.push((num >> 8) & 0xff, num & 0xff); + } + + private packUint32(num: number): void { + this.buffer.push((num >>> 24) & 0xff, (num >>> 16) & 0xff, (num >>> 8) & 0xff, num & 0xff); + } + + private packUint64(num: number): void { + const high = Math.floor(num / 0x100000000); + const low = num >>> 0; + + this.buffer.push( + (high >>> 24) & 0xff, + (high >>> 16) & 0xff, + (high >>> 8) & 0xff, + high & 0xff, + (low >>> 24) & 0xff, + (low >>> 16) & 0xff, + (low >>> 8) & 0xff, + low & 0xff, + ); + } + + private packInt8(num: number): void { + this.buffer.push(num & 0xff); + } + + private packInt16(num: number): void { + this.buffer.push((num >> 8) & 0xff, num & 0xff); + } + + private packInt32(num: number): void { + this.buffer.push((num >>> 24) & 0xff, (num >>> 16) & 0xff, (num >>> 8) & 0xff, num & 0xff); + } + + private packInt64(num: number): void { + const high = Math.floor(num / 0x100000000); + const low = num >>> 0; + + this.buffer.push( + (high >>> 24) & 0xff, + (high >>> 16) & 0xff, + (high >>> 8) & 0xff, + high & 0xff, + (low >>> 24) & 0xff, + (low >>> 16) & 0xff, + (low >>> 8) & 0xff, + low & 0xff, + ); + } + + private packFloat64(num: number): void { + this.buffer.push(Format.FLOAT64); + + // Handle special cases + if (num === 0) { + if (1 / num === -Infinity) { + // Negative zero + this.buffer.push(0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + } else { + this.buffer.push(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + } + return; + } + + let sign = 0; + if (num < 0) { + sign = 1; + num = -num; + } + + const exp = Math.floor(Math.log(num) / Math.LN2); + const frac0 = num / 2 ** exp - 1; + const frac1 = Math.floor(frac0 * 2 ** 52); + const b32 = 2 ** 32; + + const h32 = (sign << 31) | ((exp + 1023) << 20) | Math.floor(frac1 / b32); + const l32 = frac1 % b32; + + this.packInt32(h32); + this.packInt32(l32); + } + + private packString(str: string): void { + let bytes: number[]; + + if (this.utf8Strings) { + bytes = this.encodeUtf8(str); + } else { + // Legacy: each character as one byte (ASCII-like) + bytes = []; + for (let i = 0; i < str.length; i++) { + bytes.push(str.charCodeAt(i) & 0xff); + } + } + + const length = bytes.length; + + // Fixstr (up to 31 bytes) + if (length <= 0x1f) { + this.buffer.push(0xa0 | length); + } + // str8 (up to 255 bytes) + else if (length <= 0xff) { + this.buffer.push(Format.STR8); + this.packUint8(length); + } + // str16 (up to 65535 bytes) + else if (length <= 0xffff) { + this.buffer.push(Format.STR16); + this.packUint16(length); + } + // str32 (up to 4294967295 bytes) + else if (length <= 0xffffffff) { + this.buffer.push(Format.STR32); + this.packUint32(length); + } else { + throw new Error("MessagePack: string too long"); + } + + this.buffer.push(...bytes); + } + + private encodeUtf8(str: string): number[] { + const bytes: number[] = []; + + for (let i = 0; i < str.length; i++) { + let code = str.charCodeAt(i); + + // Handle surrogate pairs + if (code >= 0xd800 && code <= 0xdbff && i + 1 < str.length) { + const next = str.charCodeAt(i + 1); + if (next >= 0xdc00 && next <= 0xdfff) { + code = 0x10000 + ((code - 0xd800) << 10) + (next - 0xdc00); + i++; + } + } + + if (code < 0x80) { + // Single byte + bytes.push(code); + } else if (code < 0x800) { + // Two bytes + bytes.push(0xc0 | (code >> 6), 0x80 | (code & 0x3f)); + } else if (code < 0x10000) { + // Three bytes + bytes.push(0xe0 | (code >> 12), 0x80 | ((code >> 6) & 0x3f), 0x80 | (code & 0x3f)); + } else { + // Four bytes + bytes.push( + 0xf0 | (code >> 18), + 0x80 | ((code >> 12) & 0x3f), + 0x80 | ((code >> 6) & 0x3f), + 0x80 | (code & 0x3f), + ); + } + } + + return bytes; + } + + private packBinary(data: Uint8Array): void { + const length = data.length; + + // bin8 (up to 255 bytes) + if (length <= 0xff) { + this.buffer.push(Format.BIN8); + this.packUint8(length); + } + // bin16 (up to 65535 bytes) + else if (length <= 0xffff) { + this.buffer.push(Format.BIN16); + this.packUint16(length); + } + // bin32 (up to 4294967295 bytes) + else if (length <= 0xffffffff) { + this.buffer.push(Format.BIN32); + this.packUint32(length); + } else { + throw new Error("MessagePack: binary data too long"); + } + + this.buffer.push(...data); + } + + private packArray(array: MessagePackValue[]): void { + const length = array.length; + + // Fixarray (up to 15 elements) + if (length <= 0x0f) { + this.buffer.push(0x90 | length); + } + // array16 (up to 65535 elements) + else if (length <= 0xffff) { + this.buffer.push(Format.ARRAY16); + this.packUint16(length); + } + // array32 (up to 4294967295 elements) + else if (length <= 0xffffffff) { + this.buffer.push(Format.ARRAY32); + this.packUint32(length); + } else { + throw new Error("MessagePack: array too long"); + } + + for (const item of array) { + this.packValue(item); + } + } + + private packObject(obj: { [key: string]: MessagePackValue }): void { + const keys = Object.keys(obj); + const length = keys.length; + + // Fixmap (up to 15 pairs) + if (length <= 0x0f) { + this.buffer.push(0x80 | length); + } + // map16 (up to 65535 pairs) + else if (length <= 0xffff) { + this.buffer.push(Format.MAP16); + this.packUint16(length); + } + // map32 (up to 4294967295 pairs) + else if (length <= 0xffffffff) { + this.buffer.push(Format.MAP32); + this.packUint32(length); + } else { + throw new Error("MessagePack: object too large"); + } + + for (const key of keys) { + this.packValue(key); + this.packValue(obj[key]); + } + } +} diff --git a/packages/utils/src/msgpack/index.ts b/packages/utils/src/msgpack/index.ts new file mode 100644 index 00000000..413bfdbe --- /dev/null +++ b/packages/utils/src/msgpack/index.ts @@ -0,0 +1,70 @@ +/** + * MessagePack TypeScript Implementation + * + * A pure TypeScript implementation of the MessagePack serialization format. + * + * Specification: https://github.com/msgpack/msgpack/blob/master/spec.md + * Based on: https://github.com/cuzic/MessagePack-JS + * + * @example + * ```typescript + * import { pack, unpack } from "@statewalker/vcs-utils/msgpack"; + * + * // Encode + * const packed = pack({ hello: "world", num: 42 }); + * + * // Decode + * const unpacked = unpack(packed); + * console.log(unpacked); // { hello: "world", num: 42 } + * ``` + * + * @module + */ + +export { Decoder } from "./decoder.js"; +export { Encoder } from "./encoder.js"; +export { + CharSet, + type DecoderOptions, + type EncoderOptions, + FixedRange, + Format, + type MessagePackValue, +} from "./types.js"; + +import { Decoder } from "./decoder.js"; +import { Encoder } from "./encoder.js"; +import type { DecoderOptions, EncoderOptions, MessagePackValue } from "./types.js"; + +/** + * Encode a value to MessagePack format + * @param value - Value to encode + * @param options - Encoder options + * @returns Uint8Array containing the encoded data + */ +export function pack(value: MessagePackValue, options?: EncoderOptions): Uint8Array { + const encoder = new Encoder(options); + return encoder.pack(value); +} + +/** + * Decode MessagePack data + * @param data - Binary data to decode (Uint8Array or string) + * @param options - Decoder options + * @returns Decoded value + */ +export function unpack(data: Uint8Array | string, options?: DecoderOptions): MessagePackValue { + const decoder = new Decoder(data, options); + return decoder.unpack(); +} + +/** + * Encode a value to a string (for compatibility with original implementation) + * @param value - Value to encode + * @param options - Encoder options + * @returns String where each character represents a byte + */ +export function packToString(value: MessagePackValue, options?: EncoderOptions): string { + const encoder = new Encoder(options); + return encoder.packToString(value); +} diff --git a/packages/utils/src/msgpack/types.ts b/packages/utils/src/msgpack/types.ts new file mode 100644 index 00000000..613cd976 --- /dev/null +++ b/packages/utils/src/msgpack/types.ts @@ -0,0 +1,132 @@ +/** + * MessagePack TypeScript Types + * + * Based on the MessagePack specification: + * https://github.com/msgpack/msgpack/blob/master/spec.md + * + * Original JS implementation by cuzic: + * https://github.com/cuzic/MessagePack-JS + */ + +/** + * Types that can be serialized/deserialized by MessagePack + */ +export type MessagePackValue = + | null + | undefined + | boolean + | number + | string + | Uint8Array + | MessagePackValue[] + | { [key: string]: MessagePackValue }; + +/** + * Character set options for decoding raw data + */ +export enum CharSet { + /** UTF-8 encoded strings (default) */ + UTF8 = 0, + /** ASCII 8-bit strings */ + ASCII = 1, + /** UTF-16 encoded strings (not yet implemented) */ + UTF16 = 2, + /** Return raw byte arrays instead of strings */ + ByteArray = -1, +} + +/** + * Options for the decoder + */ +export interface DecoderOptions { + /** Character set to use when decoding raw data */ + charSet?: CharSet | "utf-8" | "ascii" | "utf16" | "byte-array"; +} + +/** + * Options for the encoder + */ +export interface EncoderOptions { + /** Whether to encode strings as UTF-8 bytes (default: true) */ + utf8Strings?: boolean; +} + +/** + * MessagePack format bytes (first byte markers) + */ +export const Format = { + // Nil, Boolean + NIL: 0xc0, + NEVER_USED: 0xc1, // This byte is never used in MessagePack + FALSE: 0xc2, + TRUE: 0xc3, + + // Binary (raw bytes) + BIN8: 0xc4, + BIN16: 0xc5, + BIN32: 0xc6, + + // Extension + EXT8: 0xc7, + EXT16: 0xc8, + EXT32: 0xc9, + + // Float + FLOAT32: 0xca, + FLOAT64: 0xcb, + + // Unsigned integers + UINT8: 0xcc, + UINT16: 0xcd, + UINT32: 0xce, + UINT64: 0xcf, + + // Signed integers + INT8: 0xd0, + INT16: 0xd1, + INT32: 0xd2, + INT64: 0xd3, + + // Fixed extension + FIXEXT1: 0xd4, + FIXEXT2: 0xd5, + FIXEXT4: 0xd6, + FIXEXT8: 0xd7, + FIXEXT16: 0xd8, + + // String (raw in old spec) + STR8: 0xd9, + STR16: 0xda, + STR32: 0xdb, + + // Array + ARRAY16: 0xdc, + ARRAY32: 0xdd, + + // Map + MAP16: 0xde, + MAP32: 0xdf, +} as const; + +/** + * Fixed format ranges + */ +export const FixedRange = { + // Positive fixint: 0x00 - 0x7f (0 to 127) + POSITIVE_FIXINT_MAX: 0x7f, + + // Fixmap: 0x80 - 0x8f (0 to 15 elements) + FIXMAP_PREFIX: 0x80, + FIXMAP_MAX: 0x8f, + + // Fixarray: 0x90 - 0x9f (0 to 15 elements) + FIXARRAY_PREFIX: 0x90, + FIXARRAY_MAX: 0x9f, + + // Fixstr: 0xa0 - 0xbf (0 to 31 bytes) + FIXSTR_PREFIX: 0xa0, + FIXSTR_MAX: 0xbf, + + // Negative fixint: 0xe0 - 0xff (-32 to -1) + NEGATIVE_FIXINT_PREFIX: 0xe0, +} as const; diff --git a/packages/utils/tests/msgpack/msgpack.test.ts b/packages/utils/tests/msgpack/msgpack.test.ts new file mode 100644 index 00000000..42454bdb --- /dev/null +++ b/packages/utils/tests/msgpack/msgpack.test.ts @@ -0,0 +1,491 @@ +/** + * MessagePack Tests + * + * Based on the original test cases from MessagePack-JS: + * https://github.com/cuzic/MessagePack-JS + */ + +import { describe, expect, it } from "vitest"; +import { CharSet, Decoder, Encoder, pack, packToString, unpack } from "../../src/msgpack/index.js"; + +/** + * Helper function to convert hex string to byte array + */ +function hexToBytes(hex: string): Uint8Array { + const bytes = new Uint8Array(hex.length / 2); + for (let i = 0; i < hex.length; i += 2) { + bytes[i / 2] = parseInt(hex.slice(i, i + 2), 16); + } + return bytes; +} + +/** + * Helper function to convert byte array to hex string + */ +function _bytesToHex(bytes: Uint8Array): string { + return Array.from(bytes) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); +} + +/** + * Helper function to create string from hex (for decoder tests) + */ +function hexToString(hex: string): string { + let str = ""; + for (let i = 0; i < hex.length; i += 2) { + str += String.fromCharCode(parseInt(hex.slice(i, i + 2), 16)); + } + return str; +} + +describe("MessagePack", () => { + describe("unpack (decoder)", () => { + it("decodes positive fixnum", () => { + const data = hexToString("00"); + const output = unpack(data); + expect(output).toBe(0); + }); + + it("decodes negative fixnum", () => { + const data = hexToString("ff"); + const output = unpack(data); + expect(output).toBe(-1); + }); + + it("decodes uint8", () => { + const data = hexToString("ccff"); + const output = unpack(data); + expect(output).toBe(255); + }); + + it("decodes fixstr", () => { + const data = hexToString("a161"); + const output = unpack(data); + expect(output).toBe("a"); + }); + + it("decodes fixarray", () => { + const data = hexToString("9100"); + const output = unpack(data); + expect(output).toEqual([0]); + }); + + it("decodes fixmap", () => { + const data = hexToString("8100c0"); + const output = unpack(data); + expect(output).toEqual({ "0": null }); + }); + + it("decodes nil", () => { + const data = hexToString("c0"); + const output = unpack(data); + expect(output).toBe(null); + }); + + it("decodes true", () => { + const data = hexToString("c3"); + const output = unpack(data); + expect(output).toBe(true); + }); + + it("decodes false", () => { + const data = hexToString("c2"); + const output = unpack(data); + expect(output).toBe(false); + }); + + it("decodes double", () => { + const data = hexToString("cb3fb999999999999a"); + const output = unpack(data); + expect(output).toBeCloseTo(0.1, 15); + }); + + it("decodes uint16", () => { + const data = hexToString("cd8000"); + const output = unpack(data); + expect(output).toBe(32768); + }); + + it("decodes uint32", () => { + const data = hexToString("ce00100000"); + const output = unpack(data); + expect(output).toBe(1048576); + }); + + it("decodes uint64", () => { + const data = hexToString("cf0000010000000000"); + const output = unpack(data); + expect(output).toBe(1099511627776); + }); + + it("decodes int8", () => { + const data = hexToString("d0c0"); + const output = unpack(data); + expect(output).toBe(-64); + }); + + it("decodes int16", () => { + const data = hexToString("d1fc00"); + const output = unpack(data); + expect(output).toBe(-1024); + }); + + it("decodes int32", () => { + const data = hexToString("d2fff00000"); + const output = unpack(data); + expect(output).toBe(-1048576); + }); + + it("decodes int64", () => { + const data = hexToString("d3ffffff0000000000"); + const output = unpack(data); + expect(output).toBe(-1099511627776); + }); + + it("decodes str16 (40 spaces)", () => { + const data = hexToString( + "da002820202020202020202020202020202020202020202020202020202020202020202020202020202020", + ); + const output = unpack(data); + expect(output).toBe(" "); + }); + + it("decodes array16", () => { + const data = hexToString("dc001000000000000000000000000000000000"); + const output = unpack(data); + expect(output).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + }); + + it("decodes UTF-8 hiragana", () => { + const data = hexToString("a6e38182e38184"); + const output = unpack(data); + expect(output).toBe("\u3042\u3044"); // "あい" + }); + + it("decodes with Uint8Array input", () => { + const data = hexToBytes("ccff"); + const output = unpack(data); + expect(output).toBe(255); + }); + }); + + describe("pack (encoder)", () => { + it("encodes positive fixnum", () => { + const result = packToString(0); + expect(result).toBe(hexToString("00")); + }); + + it("encodes negative fixnum", () => { + const result = packToString(-1); + expect(result).toBe(hexToString("ff")); + }); + + it("encodes uint8", () => { + const result = packToString(255); + expect(result).toBe(hexToString("ccff")); + }); + + it("encodes fixstr", () => { + const result = packToString("a"); + expect(result).toBe(hexToString("a161")); + }); + + it("encodes fixarray", () => { + const result = packToString([0]); + expect(result).toBe(hexToString("9100")); + }); + + it("encodes fixmap", () => { + const result = packToString({ a: 64 }); + expect(result).toBe(hexToString("81a16140")); + }); + + it("encodes nil", () => { + const result = packToString(null); + expect(result).toBe(hexToString("c0")); + }); + + it("encodes true", () => { + const result = packToString(true); + expect(result).toBe(hexToString("c3")); + }); + + it("encodes false", () => { + const result = packToString(false); + expect(result).toBe(hexToString("c2")); + }); + + it("encodes double", () => { + const result = packToString(0.1); + expect(result).toBe(hexToString("cb3fb999999999999a")); + }); + + it("encodes uint16", () => { + const result = packToString(32768); + expect(result).toBe(hexToString("cd8000")); + }); + + it("encodes uint32", () => { + const result = packToString(1048576); + expect(result).toBe(hexToString("ce00100000")); + }); + + it("encodes int8", () => { + const result = packToString(-64); + expect(result).toBe(hexToString("d0c0")); + }); + + it("encodes int16", () => { + const result = packToString(-1024); + expect(result).toBe(hexToString("d1fc00")); + }); + + it("encodes int32", () => { + const result = packToString(-1048576); + expect(result).toBe(hexToString("d2fff00000")); + }); + + it("encodes int64", () => { + const result = packToString(-1099511627776); + expect(result).toBe(hexToString("d3ffffff0000000000")); + }); + + it("encodes str8 (40 spaces)", () => { + // Modern MessagePack uses str8 for strings 32-255 bytes (more efficient) + // Original JS used str16 (0xda), but str8 (0xd9) is more compact + const spaces = " "; + const result = packToString(spaces); + // d9 = str8, 28 = length 40, then 40 space characters (0x20) + expect(result).toBe( + hexToString( + "d92820202020202020202020202020202020202020202020202020202020202020202020202020202020", + ), + ); + }); + + it("encodes array16", () => { + const ary = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + const result = packToString(ary); + expect(result).toBe(hexToString("dc001000000000000000000000000000000000")); + }); + + it("returns Uint8Array from pack()", () => { + const result = pack(42); + expect(result).toBeInstanceOf(Uint8Array); + expect(result).toEqual(new Uint8Array([42])); + }); + }); + + describe("roundtrip", () => { + it("roundtrips integers", () => { + const values = [ + 0, 1, 127, 128, 255, 256, 32767, 32768, 65535, 65536, 2147483647, -1, -32, -33, -128, -129, + -32768, -32769, -2147483648, + ]; + for (const value of values) { + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toBe(value); + } + }); + + it("roundtrips floats", () => { + const values = [0.1, 0.5, 1.5, -0.1, -0.5, Math.PI, Math.PI, Math.E]; + for (const value of values) { + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toBeCloseTo(value, 10); + } + }); + + it("roundtrips strings", () => { + const values = ["", "a", "hello", "hello world", "\u3042\u3044"]; + for (const value of values) { + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toBe(value); + } + }); + + it("roundtrips arrays", () => { + const values = [[], [1], [1, 2, 3], ["a", "b"], [1, "two", null, true]]; + for (const value of values) { + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toEqual(value); + } + }); + + it("roundtrips objects", () => { + const values = [{}, { a: 1 }, { a: 1, b: 2 }, { hello: "world" }, { nested: { deep: true } }]; + for (const value of values) { + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toEqual(value); + } + }); + + it("roundtrips null", () => { + const packed = pack(null); + const unpacked = unpack(packed); + expect(unpacked).toBe(null); + }); + + it("roundtrips booleans", () => { + expect(unpack(pack(true))).toBe(true); + expect(unpack(pack(false))).toBe(false); + }); + + it("roundtrips complex nested structures", () => { + const value = { + name: "test", + count: 42, + enabled: true, + items: [1, 2, 3], + nested: { + deep: { + value: "hello", + }, + }, + }; + const packed = pack(value); + const unpacked = unpack(packed); + expect(unpacked).toEqual(value); + }); + }); + + describe("Decoder class", () => { + it("supports charset option as string", () => { + const data = hexToString("a161"); + const decoder = new Decoder(data, { charSet: "utf-8" }); + expect(decoder.unpack()).toBe("a"); + }); + + it("supports charset option as enum", () => { + const data = hexToString("a161"); + const decoder = new Decoder(data, { charSet: CharSet.UTF8 }); + expect(decoder.unpack()).toBe("a"); + }); + + it("returns byte array with ByteArray charset", () => { + const data = hexToString("a161"); + const decoder = new Decoder(data, { charSet: CharSet.ByteArray }); + expect(decoder.unpack()).toEqual([97]); + }); + }); + + describe("Encoder class", () => { + it("creates reusable encoder", () => { + const encoder = new Encoder(); + const result1 = encoder.pack(42); + const result2 = encoder.pack("hello"); + expect(result1).toEqual(new Uint8Array([42])); + expect(result2).toEqual(new Uint8Array([0xa5, 0x68, 0x65, 0x6c, 0x6c, 0x6f])); + }); + + it("supports packToString method", () => { + const encoder = new Encoder(); + const result = encoder.packToString(42); + expect(result).toBe("*"); // 42 = 0x2a = '*' + }); + }); + + describe("edge cases", () => { + it("handles empty string", () => { + const packed = pack(""); + const unpacked = unpack(packed); + expect(unpacked).toBe(""); + }); + + it("handles empty array", () => { + const packed = pack([]); + const unpacked = unpack(packed); + expect(unpacked).toEqual([]); + }); + + it("handles empty object", () => { + const packed = pack({}); + const unpacked = unpack(packed); + expect(unpacked).toEqual({}); + }); + + it("handles zero", () => { + const packed = pack(0); + const unpacked = unpack(packed); + expect(unpacked).toBe(0); + }); + + it("handles negative zero as zero", () => { + const packed = pack(-0); + const unpacked = unpack(packed); + expect(Object.is(unpacked, 0)).toBe(true); + }); + + it("handles large arrays", () => { + const array = new Array(100).fill(0).map((_, i) => i); + const packed = pack(array); + const unpacked = unpack(packed); + expect(unpacked).toEqual(array); + }); + + it("handles deeply nested structures", () => { + const deep = { a: { b: { c: { d: { e: 1 } } } } }; + const packed = pack(deep); + const unpacked = unpack(packed); + expect(unpacked).toEqual(deep); + }); + + it("handles Unicode strings", () => { + const strings = [ + "\u0000", // null character + "\u00ff", // Latin-1 + "\u0100", // Latin Extended-A + "\u3042\u3044\u3046", // Japanese hiragana + "\u4e2d\u6587", // Chinese + "\ud83d\ude00", // Emoji (surrogate pair) + ]; + for (const str of strings) { + const packed = pack(str); + const unpacked = unpack(packed); + expect(unpacked).toBe(str); + } + }); + }); + + describe("format verification", () => { + it("uses fixint for 0-127", () => { + for (let i = 0; i <= 127; i++) { + const packed = pack(i); + expect(packed.length).toBe(1); + expect(packed[0]).toBe(i); + } + }); + + it("uses negative fixint for -32 to -1", () => { + for (let i = -32; i <= -1; i++) { + const packed = pack(i); + expect(packed.length).toBe(1); + expect(packed[0]).toBe(i + 256); + } + }); + + it("uses fixstr for short strings", () => { + const str = "hi"; + const packed = pack(str); + expect(packed[0]).toBe(0xa0 + str.length); // fixstr prefix + }); + + it("uses fixarray for small arrays", () => { + const arr = [1, 2, 3]; + const packed = pack(arr); + expect(packed[0]).toBe(0x90 + arr.length); // fixarray prefix + }); + + it("uses fixmap for small objects", () => { + const obj = { a: 1 }; + const packed = pack(obj); + expect(packed[0]).toBe(0x80 + 1); // fixmap prefix with 1 pair + }); + }); +});