Add a sha256 hash implementation and use it for string_hash Even for something that's not cryptographically-sensitive, it is better to avoid introducing new dependencies on known-broken hashes. Bug: chromium:463302946 Change-Id: Idc3dacc0d7d84845bfd3f9e9ec5147a7ec0347b2 Reviewed-on: https://gn-review.googlesource.com/c/gn/+/20540 Reviewed-by: Ćukasz Anforowicz <lukasza@chromium.org> Reviewed-by: Andrew Grieve <agrieve@google.com> Commit-Queue: Daniel Cheng <dcheng@chromium.org>
diff --git a/build/gen.py b/build/gen.py index 5d161e4..e5e2d7e 100755 --- a/build/gen.py +++ b/build/gen.py
@@ -629,6 +629,7 @@ 'src/base/memory/ref_counted.cc', 'src/base/memory/weak_ptr.cc', 'src/base/sha1.cc', + 'src/base/sha2.cc', 'src/base/strings/string_number_conversions.cc', 'src/base/strings/string_split.cc', 'src/base/strings/string_util.cc', @@ -813,6 +814,7 @@ 'gn': {'sources': [ 'src/gn/gn_main.cc' ], 'libs': []}, 'gn_unittests': { 'sources': [ + 'src/base/sha2_unittest.cc', 'src/gn/action_target_generator_unittest.cc', 'src/gn/analyzer_unittest.cc', 'src/gn/args_unittest.cc',
diff --git a/docs/reference.md b/docs/reference.md index d5dfa0f..7e848c7 100644 --- a/docs/reference.md +++ b/docs/reference.md
@@ -1330,7 +1330,7 @@ Restricts output to targets which refer to input files by a specific relation. Defaults to any relation. Can be provided multiple times to include multiple relations. - + ``` #### **Examples (target input)** @@ -3464,9 +3464,9 @@ hash = string_hash(long_string) `string_hash` returns a string that contains a hash of the argument. The hash - is computed by first calculating an MD5 hash of the argument, and then + is computed by first calculating a SHA256 hash of the argument, and then returning the first 8 characters of the lowercase-ASCII, hexadecimal encoding - of the MD5 hash. + of the SHA256 hash. `string_hash` is intended to be used when it is desirable to translate, globally unique strings (such as GN labels) into short filenames that are @@ -3483,7 +3483,7 @@ #### **Examples**: ``` - string_hash("abc") --> "90015098" + string_hash("abc") --> "ba7816bf" ``` ### <a name="func_string_join"></a>**string_join**: Concatenates a list of strings with a separator. [Back to Top](#gn-reference) @@ -8473,4 +8473,3 @@ * -v: Verbose logging. * --version: Prints the GN version number and exits. ``` -
diff --git a/src/base/sha2.cc b/src/base/sha2.cc new file mode 100644 index 0000000..e6fcc08 --- /dev/null +++ b/src/base/sha2.cc
@@ -0,0 +1,179 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/sha2.h" + +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <algorithm> +#include <array> +#include <utility> + +#include "base/logging.h" +#include "base/sys_byteorder.h" + +namespace base { + +namespace { + +// Reference: http://dx.doi.org/10.6028/NIST.FIPS.180-4 + +// From section 2.2.2: +uint32_t RotateRight(uint32_t x, uint8_t n) { + return (x >> n) | (x << (32 - n)); +} + +// From section 4.1.2: +uint32_t Ch(uint32_t x, uint32_t y, uint32_t z) { + return (x & y) ^ (~x & z); +} + +uint32_t Maj(uint32_t x, uint32_t y, uint32_t z) { + return (x & y) ^ (x & z) ^ (y & z); +} + +uint32_t Sum0(uint32_t x) { + return RotateRight(x, 2) ^ RotateRight(x, 13) ^ RotateRight(x, 22); +} + +uint32_t Sum1(uint32_t x) { + return RotateRight(x, 6) ^ RotateRight(x, 11) ^ RotateRight(x, 25); +} + +uint32_t Sigma0(uint32_t x) { + return RotateRight(x, 7) ^ RotateRight(x, 18) ^ (x >> 3); +} + +uint32_t Sigma1(uint32_t x) { + return RotateRight(x, 17) ^ RotateRight(x, 19) ^ (x >> 10); +} + +// From section 4.2.2: +constexpr std::array<uint32_t, 64> kConstants = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +class Sha256Hasher { + public: + Sha256Hasher() = default; + + // Pre-requisite: `chunk.size() == 64`, since SHA256 operates on 512-bit + // blocks. + void Update(std::string_view chunk) { + CHECK(chunk.size() == 64); + + // From section 6.2.2, step 1: "Prepare the message schedule" + memcpy(w_.data(), chunk.data(), chunk.size()); + for (int t = 0; t < 16; ++t) { + w_[t] = ByteSwap(w_[t]); + } + for (int t = 16; t < 64; ++t) { + w_[t] = Sigma1(w_[t - 2]) + w_[t - 7] + Sigma0(w_[t - 15]) + w_[t - 16]; + } + + // From section 6.2.2, step 2: "Initialize the eight working variables" + uint32_t a = hash_[0]; + uint32_t b = hash_[1]; + uint32_t c = hash_[2]; + uint32_t d = hash_[3]; + uint32_t e = hash_[4]; + uint32_t f = hash_[5]; + uint32_t g = hash_[6]; + uint32_t h = hash_[7]; + + // From section 6.2.2, step 3: + for (int t = 0; t < 64; ++t) { + uint32_t tmp1 = h + Sum1(e) + Ch(e, f, g) + kConstants[t] + w_[t]; + uint32_t tmp2 = Sum0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + tmp1; + d = c; + c = b; + b = a; + a = tmp1 + tmp2; + } + + // From section 6.2.2, step 4: + hash_[0] += a; + hash_[1] += b; + hash_[2] += c; + hash_[3] += d; + hash_[4] += e; + hash_[5] += f; + hash_[6] += g; + hash_[7] += h; + } + + std::array<uint8_t, kSha256Length> Finalize(std::string_view chunk, + uint64_t original_size) && { + std::array<char, 64> padding_chunk = {}; + auto next = std::copy(chunk.begin(), chunk.end(), padding_chunk.begin()); + // From section 5.1.1, the padding consists of a 0x80 byte, followed by a + // 64-bit block with the length of the message in bits in big-endian order. + *next++ = uint8_t{0x80}; + + // If there's not enough space for the length, pad out one additional chunk. + if (padding_chunk.end() - next < 8) { + Update(std::string_view(padding_chunk.data(), padding_chunk.size())); + std::fill(padding_chunk.begin(), padding_chunk.begin() + 56, 0); + } + + const uint64_t original_size_in_bits = original_size * 8; + padding_chunk[56] = (original_size_in_bits >> 56) & 0xffu; + padding_chunk[57] = (original_size_in_bits >> 48) & 0xffu; + padding_chunk[58] = (original_size_in_bits >> 40) & 0xffu; + padding_chunk[59] = (original_size_in_bits >> 32) & 0xffu; + padding_chunk[60] = (original_size_in_bits >> 24) & 0xffu; + padding_chunk[61] = (original_size_in_bits >> 16) & 0xffu; + padding_chunk[62] = (original_size_in_bits >> 8) & 0xffu; + padding_chunk[63] = original_size_in_bits & 0xffu; + Update(std::string_view(padding_chunk.data(), padding_chunk.size())); + + std::array<uint8_t, kSha256Length> result; + for (uint32_t& word : hash_) { + word = ByteSwap(word); + } + memcpy(result.data(), hash_.data(), sizeof(result)); + return result; + } + + private: + // From section 5.3.3: + std::array<uint32_t, 8> hash_ = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, + }; + // The message schedule for intermediate states. Member to make it easier to + // reuse code between `Update()` and `Finalize()`. + std::array<uint32_t, 64> w_ = {}; +}; + +} // namespace + +std::array<uint8_t, kSha256Length> Sha256(std::string_view str) { + const size_t original_size = str.size(); + Sha256Hasher hasher; + while (str.size() >= 64) { + std::string_view next = str.substr(0, 64); + hasher.Update(next); + str.remove_prefix(64); + } + return std::move(hasher).Finalize(str, original_size); +} + +} // namespace base
diff --git a/src/base/sha2.h b/src/base/sha2.h new file mode 100644 index 0000000..94dffe7 --- /dev/null +++ b/src/base/sha2.h
@@ -0,0 +1,23 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_SHA2_H_ +#define BASE_SHA2_H_ + +#include <stdint.h> + +#include <array> +#include <string_view> + +namespace base { + +static const size_t kSha256Length = 32; // Length in bytes of a SHA-256 hash. + +// Computes the SHA-256 hash of `bytes` and returns the digest as an array of +// bytes. +std::array<uint8_t, kSha256Length> Sha256(std::string_view bytes); + +} // namespace base + +#endif // BASE_SHA2_H_
diff --git a/src/base/sha2_unittest.cc b/src/base/sha2_unittest.cc new file mode 100644 index 0000000..99a08b0 --- /dev/null +++ b/src/base/sha2_unittest.cc
@@ -0,0 +1,58 @@ +// Copyright 2025 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/sha2.h" + +#include <string> + +#include "base/strings/string_number_conversions.h" +#include "util/test/test.h" + +namespace base { + +namespace { + +std::string Sha256AsHexString(std::string_view in) { + std::array<uint8_t, kSha256Length> result = Sha256(in); + return HexEncode(result.data(), result.size()); +} + +TEST(Sha2Test, Basic) { + EXPECT_EQ("E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855", + Sha256AsHexString("")); + + // Reference values from + // https://csrc.nist.gov/csrc/media/projects/cryptographic-standards-and-guidelines/documents/examples/sha256.pdf + EXPECT_EQ("BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD", + Sha256AsHexString("abc")); + EXPECT_EQ("248D6A61D20638B8E5C026930C3E6039A33CE45964FF2167F6ECEDD419DB06C1", + Sha256AsHexString( + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")); + // Additional tests from + // https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Standards-and-Guidelines/documents/examples/SHA2_Additional.pdf + EXPECT_EQ("68325720AABD7C82F30F554B313D0570C95ACCBB7DC4B5AAE11204C08FFE732B", + Sha256AsHexString("\xbd")); + EXPECT_EQ("7ABC22C0AE5AF26CE93DBB94433A0E0B2E119D014F8E7F65BD56C61CCCCD9504", + Sha256AsHexString("\xc9\x8c\x8e\x55")); + EXPECT_EQ("02779466CDEC163811D078815C633F21901413081449002F24AA3E80F0B88EF7", + Sha256AsHexString(std::string(55, '\0'))); + EXPECT_EQ("D4817AA5497628E7C77E6B606107042BBBA3130888C5F47A375E6179BE789FBB", + Sha256AsHexString(std::string(56, '\0'))); + EXPECT_EQ("65A16CB7861335D5ACE3C60718B5052E44660726DA4CD13BB745381B235A1785", + Sha256AsHexString(std::string(57, '\0'))); + EXPECT_EQ("F5A5FD42D16A20302798EF6ED309979B43003D2320D9F0E8EA9831A92759FB4B", + Sha256AsHexString(std::string(64, '\0'))); + EXPECT_EQ("541B3E9DAA09B20BF85FA273E5CBD3E80185AA4EC298E765DB87742B70138A53", + Sha256AsHexString(std::string(1000, '\0'))); + EXPECT_EQ("C2E686823489CED2017F6059B8B239318B6364F6DCD835D0A519105A1EADD6E4", + Sha256AsHexString(std::string(1000, 'A'))); + EXPECT_EQ("F4D62DDEC0F3DD90EA1380FA16A5FF8DC4C54B21740650F24AFC4120903552B0", + Sha256AsHexString(std::string(1005, 'U'))); + EXPECT_EQ("D29751F2649B32FF572B5E0A9F541EA660A50F94FF0BEEDFB0B692B924CC8025", + Sha256AsHexString(std::string(1000000, '\0'))); +} + +} // namespace + +} // namespace base
diff --git a/src/gn/functions.cc b/src/gn/functions.cc index 246e668..830fcee 100644 --- a/src/gn/functions.cc +++ b/src/gn/functions.cc
@@ -10,7 +10,8 @@ #include <utility> #include "base/environment.h" -#include "base/md5.h" +#include "base/sha2.h" +#include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "gn/build_settings.h" #include "gn/config.h" @@ -1146,9 +1147,9 @@ hash = string_hash(long_string) `string_hash` returns a string that contains a hash of the argument. The hash - is computed by first calculating an MD5 hash of the argument, and then + is computed by first calculating the SHA256 hash of the argument, and then returning the first 8 characters of the lowercase-ASCII, hexadecimal encoding - of the MD5 hash. + of the SHA256 hash. `string_hash` is intended to be used when it is desirable to translate, globally unique strings (such as GN labels) into short filenames that are @@ -1163,7 +1164,7 @@ Examples: - string_hash("abc") --> "90015098" + string_hash("abc") --> "ba7816bf" )"; Value RunStringHash(Scope* scope, @@ -1186,25 +1187,12 @@ const std::string& arg = args[0].string_value(); // Arguments looks good; do the hash. - - // MD5 has been chosen as the hash algorithm, because: - // - // 1. MD5 implementation has been readily available in GN repo. - // 2. It fits the requirements of the motivating scenario - // (see https://crbug.com/463302946). In particular: - // 2.1. This scenario doesn't require cryptographic-strength hashing. - // 2.2. MD5 produces slightly shorter hashes than SHA1 and this scenario - // cares about keeping the filenames short, and somewhat ergonomic. - // 2.3. MD5 is a well-known hashing algorithm and this scenario needs - // to replicate the same hash outside of GN. - std::string md5 = base::MD5String(arg); + std::array<uint8_t, base::kSha256Length> hash = base::Sha256(arg); // Trimming to 32 bits for improved ergonomics. Probability of collisions // should still be sufficiently low (see https://crbug.com/46330294 for more // discussion). - std::string trimmed = md5.substr(0, 8); - - return Value(function, trimmed); + return Value(function, base::ToLowerASCII(base::HexEncode(hash.data(), 4))); } // string_join -----------------------------------------------------------------
diff --git a/src/gn/functions_unittest.cc b/src/gn/functions_unittest.cc index c51351a..61a3b35 100644 --- a/src/gn/functions_unittest.cc +++ b/src/gn/functions_unittest.cc
@@ -218,8 +218,8 @@ ASSERT_FALSE(err.has_error()) << err.message(); EXPECT_EQ( - "<90015098>\n" - "<d41d8cd9>\n", + "<ba7816bf>\n" + "<e3b0c442>\n", setup.print_output()) << setup.print_output(); }