bech32: expose the character conversion functionality

In the next commit we will implement a new checksum, codex32, which uses
the same encoding and HRP rules as bech32 and bech32m, but has a
substantially different checksum verification procedure. To minimize
duplicated code, we expose the character conversion in a new
bech32::internals module.

Co-authored-by: Luke Dashjr <luke-jr+git@utopios.org>
Github-Pull: #27351
Rebased-From: 7a57cda8e4461b1ac075251c945eaeb18f1edfd4
This commit is contained in:
Luke Dashjr 2024-11-13 19:36:31 +00:00
parent c62bbf6f86
commit b29bce1f16
2 changed files with 91 additions and 56 deletions

View File

@ -17,22 +17,7 @@ namespace bech32
namespace
{
typedef std::vector<uint8_t> data;
/** The Bech32 and Bech32m character set for encoding. */
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
/** The Bech32 and Bech32m character set for decoding. */
const int8_t CHARSET_REV[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
};
typedef internal::data data;
/** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
* The defining polynomial of the extension is x^2 + 9x + 23.
@ -308,6 +293,55 @@ bool CheckCharacters(const std::string& str, std::vector<int>& errors)
return errors.empty();
}
/** Verify a checksum. */
Encoding VerifyChecksum(const std::string& hrp, const data& values)
{
// PolyMod computes what value to xor into the final values to make the checksum 0. However,
// if we required that the checksum was 0, it would be the case that appending a 0 to a valid
// list of values would result in a new valid list. For that reason, Bech32 requires the
// resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
// https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
auto enc = internal::PreparePolynomialCoefficients(hrp, values);
const uint32_t check = PolyMod(enc);
if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
return Encoding::INVALID;
}
/** Create a checksum. */
data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
{
auto enc = internal::PreparePolynomialCoefficients(hrp, values);
enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
data ret(CHECKSUM_SIZE);
for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
// Convert the 5-bit groups in mod to checksum values.
ret[i] = (mod >> (5 * (5 - i))) & 31;
}
return ret;
}
} // namespace
namespace internal {
/** The Bech32 and Bech32m character set for encoding. */
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
/** The Bech32 and Bech32m character set for decoding. */
const int8_t CHARSET_REV[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
};
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
{
data ret;
@ -323,39 +357,9 @@ std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp,
return ret;
}
/** Verify a checksum. */
Encoding VerifyChecksum(const std::string& hrp, const data& values)
{
// PolyMod computes what value to xor into the final values to make the checksum 0. However,
// if we required that the checksum was 0, it would be the case that appending a 0 to a valid
// list of values would result in a new valid list. For that reason, Bech32 requires the
// resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
// https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
auto enc = PreparePolynomialCoefficients(hrp, values);
const uint32_t check = PolyMod(enc);
if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
return Encoding::INVALID;
}
/** Create a checksum. */
data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
{
auto enc = PreparePolynomialCoefficients(hrp, values);
enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
data ret(CHECKSUM_SIZE);
for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
// Convert the 5-bit groups in mod to checksum values.
ret[i] = (mod >> (5 * (5 - i))) & 31;
}
return ret;
}
} // namespace
/** Encode a Bech32 or Bech32m string. */
std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
/** Encode a hrpstring without concerning ourselves with checksum validity */
std::string Encode(const std::string& hrp, const data& values, const data& checksum) {
// First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder
// to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the
// result will always be invalid.
@ -366,17 +370,17 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values
ret += hrp;
ret += '1';
for (const uint8_t& i : values) ret += CHARSET[i];
for (const uint8_t& i : CreateChecksum(encoding, hrp, values)) ret += CHARSET[i];
for (const uint8_t& i : checksum) ret += CHARSET[i];
return ret;
}
/** Decode a Bech32 or Bech32m string. */
DecodeResult Decode(const std::string& str, CharLimit limit) {
/** Decode a hrpstring without concerning ourselves with checksum validity */
std::pair<std::string, data> Decode(const std::string& str, CharLimit limit, size_t checksum_length) {
std::vector<int> errors;
if (!CheckCharacters(str, errors)) return {};
size_t pos = str.rfind('1');
if (str.size() > limit) return {};
if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
if (pos == str.npos || pos == 0 || pos + checksum_length >= str.size()) {
return {};
}
data values(str.size() - 1 - pos);
@ -394,9 +398,22 @@ DecodeResult Decode(const std::string& str, CharLimit limit) {
for (size_t i = 0; i < pos; ++i) {
hrp += LowerCase(str[i]);
}
Encoding result = VerifyChecksum(hrp, values);
return std::make_pair(hrp, values);
}
} // namespace internal
/** Encode a Bech32 or Bech32m string. */
std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
return internal::Encode(hrp, values, CreateChecksum(encoding, hrp, values));
}
/** Decode a Bech32 or Bech32m string. */
DecodeResult Decode(const std::string& str, CharLimit limit) {
auto res = internal::Decode(str, limit, CHECKSUM_SIZE);
Encoding result = VerifyChecksum(res.first, res.second);
if (result == Encoding::INVALID) return {};
return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
return {result, std::move(res.first), data(res.second.begin(), res.second.end() - CHECKSUM_SIZE)};
}
/** Find index of an incorrect character in a Bech32 string. */
@ -432,7 +449,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
data values(length);
for (size_t i = pos + 1; i < str.size(); ++i) {
unsigned char c = str[i];
int8_t rev = CHARSET_REV[c];
int8_t rev = internal::CHARSET_REV[c];
if (rev == -1) {
error_locations.push_back(i);
return std::make_pair("Invalid Base 32 character", std::move(error_locations));
@ -447,7 +464,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
std::vector<int> possible_errors;
// Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
// over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
auto enc = PreparePolynomialCoefficients(hrp, values);
auto enc = internal::PreparePolynomialCoefficients(hrp, values);
uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
// All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding

View File

@ -59,6 +59,24 @@ DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::BECH32)
/** Return the positions of errors in a Bech32 string. */
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::BECH32);
// The internal namespace is used for things shared between bech32(m) and codex32.
// These functions should not be used except by other hrpstring-encoded codes.
namespace internal {
typedef std::vector<uint8_t> data;
extern const char* CHARSET;
extern const int8_t CHARSET_REV[128];
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values);
/** Encode a hrpstring without concerning ourselves with checksum validity */
std::string Encode(const std::string& hrp, const data& values, const data& checksum);
/** Decode a hrpstring without concerning ourselves with checksum validity */
std::pair<std::string, data> Decode(const std::string& str, CharLimit limit, size_t checksum_length);
} // namespace internal
} // namespace bech32
#endif // BITCOIN_BECH32_H