From 3d5022b6eb59e725448015c58af55b1578426d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20=C5=A0pinka?= Date: Tue, 28 Jan 2025 22:50:40 +0100 Subject: [PATCH] Implement SHA-1. --- README.md | 9 +- src/primitive/digest/sha.zig | 279 +++++++++++++++++++++++++++++------ 2 files changed, 244 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index fd824c4..572b30b 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,24 @@ Most (**theoretical!**) users should directly use one of the cryptographic *prot - Advanced Encryption Standard (FIPS 197): AES-128, AES-192, AES-256 - ChaCha20 (RFC 7539): ChaCha20 with 64-bit nonce and 64-bit counter, ChaCha20 with 96-bit nonce and 32-bit counter - Salsa20: Salsa20/20 with 256-key, Salsa20/20 with 128-bit key +- Secure Hashing Algorithm: SHA-1 ### Protocols ## Roadmap +### Code quality + +- Use std library functions where possible +- Refactor data manipulation (LE/BE int serialization, word rotations, etc.) into one place +- Use @TypeInfo, @divExact, etc. to correctly abstract int sizes + ### Primitives - DES, 3DES - Block cipher modes: CBC-PKCS7, CFB, OFB, CTR, GCM - Poly1305 -- SHA-256 +- SHA-2, SHA-3 - BigIntegers & modular arithmetic - Cryptographically secure random BigInteger generation & primality testing - Elliptic Curve groups (over Fp fields) diff --git a/src/primitive/digest/sha.zig b/src/primitive/digest/sha.zig index 6d3cfcd..9dd327c 100644 --- a/src/primitive/digest/sha.zig +++ b/src/primitive/digest/sha.zig @@ -47,15 +47,10 @@ const SHA_512_256_IV = [_]u64{ const Sha1Ctx = struct { const BLOCK_SIZE = 512 / 8; + const MESSAGE_SCHEDULE_WORDS = 80; - message_schedule: [16]u32, - hash: [5]u32, - a: u32, - b: u32, - c: u32, - d: u32, - e: u32, - + message_schedule: [MESSAGE_SCHEDULE_WORDS]u32, + hash: [SHA_1_DIGEST_LENGTH / 4]u32, message_buffer: [BLOCK_SIZE]u8, message_length: u64, }; @@ -65,15 +60,6 @@ const Sha2Ctx = struct { message_schedule: [16]u32, hash: [8]u32, - a: u32, - b: u32, - c: u32, - d: u32, - e: u32, - f: u32, - g: u32, - h: u32, - message_buffer: [BLOCK_SIZE]u8, message_length: u64, @@ -85,15 +71,6 @@ const Sha3Ctx = struct { message_schedule: [16]u64, hash: [8]u64, - a: u64, - b: u64, - c: u64, - d: u64, - e: u64, - f: u64, - g: u64, - h: u64, - message_buffer: [BLOCK_SIZE]u8, message_length: u128, @@ -107,33 +84,156 @@ pub fn sha1_new() Sha1Ctx { var ctx = Sha1Ctx{ .message_schedule = undefined, .hash = undefined, - .a = undefined, - .b = undefined, - .c = undefined, - .d = undefined, - .e = undefined, .message_buffer = undefined, .message_length = 0, }; - @memcpy(&ctx.hash, &SHA_1_IV); - ctx.a = ctx.hash[0]; - ctx.b = ctx.hash[1]; - ctx.c = ctx.hash[2]; - ctx.d = ctx.hash[3]; - ctx.e = ctx.hash[4]; - return ctx; } pub fn sha1_update(ctx: *Sha1Ctx, message: []const u8) !void { - // TODO - _ = .{ ctx, message }; + // SHA-1 can digest a message of a maximum length of (2^64 - 1) bits due to the nature of its padding. + if (ctx.message_length + message.len > ((1 << 64) / 8)) + return MessageLengthLimitExceeded; + + const cnt_buffered_bytes = ctx.message_length % Sha1Ctx.BLOCK_SIZE; + + // Simplest case - the message did not fully fill the block size + // so it's just copied to the context and no hashing is done yet. + if (cnt_buffered_bytes + message.len < Sha1Ctx.BLOCK_SIZE) { + @memcpy( + ctx.message_buffer[cnt_buffered_bytes .. cnt_buffered_bytes + message.len], + message[0..], + ); + ctx.message_length += message.len; + return; + } + + // Otherwise: first, copy & hash the first block. + @memcpy( + ctx.message_buffer[cnt_buffered_bytes..], + message[0 .. Sha1Ctx.BLOCK_SIZE - cnt_buffered_bytes], + ); + sha1_hash_one_block(ctx); + var cnt_message_bytes_processed = Sha1Ctx.BLOCK_SIZE - cnt_buffered_bytes; + ctx.message_length += cnt_message_bytes_processed; + + // Then, as long as there is at least another block available, copy and hash it. + while (message.len - cnt_message_bytes_processed >= Sha1Ctx.BLOCK_SIZE) { + @memcpy(ctx.message_buffer[0..], message[cnt_message_bytes_processed .. cnt_message_bytes_processed + Sha1Ctx.BLOCK_SIZE]); + sha1_hash_one_block(ctx); + ctx.message_length += Sha1Ctx.BLOCK_SIZE; + cnt_message_bytes_processed += Sha1Ctx.BLOCK_SIZE; + } + + // Finally, copy any leftover bytes to the context buffer without hashing. + const cnt_leftover_bytes = message.len - cnt_message_bytes_processed; + @memcpy( + ctx.message_buffer[0..cnt_leftover_bytes], + message[cnt_message_bytes_processed..], + ); + ctx.message_length += cnt_leftover_bytes; } -pub fn sha1_final(ctx: *Sha1Ctx, out: [SHA_1_DIGEST_LENGTH]u8) void { - // TODO - _ = .{ ctx, out }; +pub fn sha1_final(ctx: *Sha1Ctx, out: *[SHA_1_DIGEST_LENGTH]u8) void { + // The message length is stored in the padding as a 64-bit int. + const message_length_bytes = 64 / 8; + + const cnt_leftover_bytes = ctx.message_length % Sha1Ctx.BLOCK_SIZE; + + // Simpler case: The leftover message is shorter than 446 bits + // (or 55 bytes) and the padding only spans one block. + if (cnt_leftover_bytes < Sha1Ctx.BLOCK_SIZE - (message_length_bytes)) { + const cnt_padding_bytes = Sha1Ctx.BLOCK_SIZE - message_length_bytes - cnt_leftover_bytes; + + // The padding (without the message length) is a single 1 bit followed by 0 bits. + ctx.message_buffer[cnt_leftover_bytes] = 0x80; + @memset(ctx.message_buffer[cnt_leftover_bytes + 1 .. cnt_leftover_bytes + cnt_padding_bytes], 0x00); + + // The length is appended. + const length = serialize_int_big_endian(u64, ctx.message_length * 8); + @memcpy(ctx.message_buffer[cnt_leftover_bytes + cnt_padding_bytes ..], length[0..]); + + // The padded block is finally hashed. + sha1_hash_one_block(ctx); + } + // Otherwise, the padding spans 2 blocks in total + // and two more hash iterations are performed. + else { + // Pad and hash the first block. + ctx.message_buffer[cnt_leftover_bytes] = 0x80; + @memset(ctx.message_buffer[cnt_leftover_bytes + 1 ..], 0x00); + sha1_hash_one_block(ctx); + + // Hash the second block. + @memset(ctx.message_buffer[0..(Sha1Ctx.BLOCK_SIZE - message_length_bytes)], 0x00); + const length = serialize_int_big_endian(u64, ctx.message_length * 8); + @memcpy(ctx.message_buffer[(Sha1Ctx.BLOCK_SIZE - message_length_bytes)..], length[0..]); + sha1_hash_one_block(ctx); + } + + // Serialize the result. + for (0..SHA_1_DIGEST_LENGTH / 4) |w| { + const serialized_word = serialize_int_big_endian(u32, ctx.hash[w]); + @memcpy(out[(w * 4)..(w * 4 + 4)], serialized_word[0..]); + } +} + +pub fn sha1_hash_one_block(ctx: *Sha1Ctx) void { + // Prepare the message schedule. + for (0..Sha1Ctx.BLOCK_SIZE / 4) |t| + ctx.message_schedule[t] = deserialize_int_big_endian(u32, @ptrCast(ctx.message_buffer[(t * 4)..(t * 4 + 4)])); + for (Sha1Ctx.BLOCK_SIZE / 4..Sha1Ctx.MESSAGE_SCHEDULE_WORDS) |t| { + ctx.message_schedule[t] = rotl( + u32, + ctx.message_schedule[t - 3] ^ ctx.message_schedule[t - 8] ^ ctx.message_schedule[t - 14] ^ ctx.message_schedule[t - 16], + 1, + ); + } + + // Initialize working variables. + var a = ctx.hash[0]; + var b = ctx.hash[1]; + var c = ctx.hash[2]; + var d = ctx.hash[3]; + var e = ctx.hash[4]; + + // Perform the actual hashing. + inline for (0..Sha1Ctx.MESSAGE_SCHEDULE_WORDS) |t| { + const tmp = rotl(u32, a, 5) +% sha1_f(t, b, c, d) +% e +% sha1_k(t) +% ctx.message_schedule[t]; + e = d; + d = c; + c = rotl(u32, b, 30); + b = a; + a = tmp; + } + + // Add the result to the previous hash state. + ctx.hash[0] +%= a; + ctx.hash[1] +%= b; + ctx.hash[2] +%= c; + ctx.hash[3] +%= d; + ctx.hash[4] +%= e; +} + +inline fn sha1_f(t: comptime_int, x: u32, y: u32, z: u32) u32 { + return switch (t) { + 0...19 => ch(u32, x, y, z), + 20...39 => parity(u32, x, y, z), + 40...59 => maj(u32, x, y, z), + 60...79 => parity(u32, x, y, z), + else => @compileError("SHA-1 `f` function called with invalid value of `t`."), + }; +} + +inline fn sha1_k(t: comptime_int) u32 { + return switch (t) { + 0...19 => 0x5a827999, + 20...39 => 0x6ed9eba1, + 40...59 => 0x8f1bbcdc, + 60...79 => 0xca62c1d6, + else => @compileError("SHA-1 `k` constant requested with invalid value of `t`."), + }; } pub fn sha2_new(t: comptime_int) Sha2Ctx { @@ -277,3 +377,96 @@ pub fn sha512_256_final(ctx: *Sha3Ctx, out: [SHA_512_256_DIGEST_LENGTH]u8) void // TODO _ = .{ ctx, out }; } + +// ----------------------------------- Non-linear functions ----------------------------------- // + +fn ch(T: type, x: T, y: T, z: T) T { + return (x & y) ^ (~x & z); +} + +fn parity(T: type, x: T, y: T, z: T) T { + return x ^ y ^ z; +} + +fn maj(T: type, x: T, y: T, z: T) T { + return (x & y) ^ (x & z) ^ (y & z); +} + +// ----------------------------------- HELPERS ----------------------------------- // + +fn rotl(T: type, word: T, bits: comptime_int) T { + if (comptime bits >= @bitSizeOf(T)) + @compileError("Will not rotate word left by more bits than it has!"); + return (word << bits) | (word >> (@bitSizeOf(T) - bits)); +} + +fn serialize_int_big_endian(T: type, int: T) [@sizeOf(T)]u8 { + var res: [@sizeOf(T)]u8 = undefined; + for (0..@sizeOf(T)) |i| + res[i] = @truncate(int >> @intCast(8 * (@sizeOf(T) - i - 1))); + return res; +} + +fn deserialize_int_big_endian(T: type, bytes: *const [@sizeOf(T)]u8) T { + var res: T = 0; + for (0..@sizeOf(T)) |i| + res |= @as(T, bytes[i]) << @intCast(8 * (@sizeOf(T) - i - 1)); + return res; +} + +// ----------------------------------- TEST VECTORS ----------------------------------- // + +fn hex_nibble_to_int(ascii_hex: u8) u4 { + const x = ascii_hex; + return @intCast(if (x >= '0' and x <= '9') + x - '0' + else if (x >= 'a' and x <= 'f') + 10 + (x - 'a') + else if (x >= 'A' and x <= 'F') + 10 + (x - 'A') + else + @panic("Argument is not a valid hex digit!")); +} + +fn hex_to_bytes(L: comptime_int, hex_string: *const [2 * L]u8) [L]u8 { + var res: [L]u8 = undefined; + for (0..L) |i| { + res[i] = @as(u8, hex_nibble_to_int(hex_string[2 * i])) << 4; + res[i] |= hex_nibble_to_int(hex_string[2 * i + 1]); + } + return res; +} + +// https://www.di-mgt.com.au/sha_testvectors.html +test "SHA-1 basic test" { + const tests = [_]struct { + message: []const u8, + hash: *const [2 * SHA_1_DIGEST_LENGTH]u8, + }{ + .{ .message = "", .hash = "da39a3ee5e6b4b0d3255bfef95601890afd80709" }, + .{ .message = "abc", .hash = "a9993e364706816aba3e25717850c26c9cd0d89d" }, + .{ + .message = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + .hash = "84983e441c3bd26ebaae4aa1f95129e5e54670f1", + }, + .{ + .message = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + .hash = "a49b2446a02c645bf419f995b67091253a04a259", + }, + }; + + var digest_buffer: [SHA_1_DIGEST_LENGTH]u8 = undefined; + + for (tests) |t| { + var ctx = sha1_new(); + try sha1_update(&ctx, t.message); + sha1_final(&ctx, &digest_buffer); + + const reference = hex_to_bytes(SHA_1_DIGEST_LENGTH, t.hash); + try testing.expectEqualSlices(u8, reference[0..], digest_buffer[0..]); + } +} + +test "SHA-1 padding test" { + // TODO +}