diff --git a/config.cmake b/config.cmake
index 25cd5d25e..88542a0b5 100644
--- a/config.cmake
+++ b/config.cmake
@@ -462,4 +462,46 @@ config_string(
     DEPENDS "KernelIsMCS" UNDEF_DISABLED
 )
 
+config_option(
+    KernelClzlImpl CLZL_IMPL
+    "Define a __clzdi2 function to count leading zeros for unsigned long arguments. \
+     Only needed on platforms which lack a builtin instruction."
+    DEFAULT OFF
+)
+
+config_option(
+    KernelClzllImpl CLZLL_IMPL
+    "Define a __clzti2 function to count leading zeros for unsigned long long arguments. \
+     Only needed on platforms which lack a builtin instruction."
+    DEFAULT OFF
+)
+
+config_option(
+    KernelCtzlImpl CTZL_IMPL
+    "Define a __ctzdi2 function to count trailing zeros for unsigned long arguments. \
+     Only needed on platforms which lack a builtin instruction."
+    DEFAULT OFF
+)
+
+config_option(
+    KernelCtzllImpl CTZLL_IMPL
+    "Define a __ctzti2 function to count trailing zeros for unsigned long long arguments. \
+     Only needed on platforms which lack a builtin instruction."
+    DEFAULT OFF
+)
+
+config_option(
+    KernelClzNoBuiltin CLZ_NO_BUILTIN
+    "Expose implementations of clzl and clzll to verification by avoiding the use \
+     of __builtin_clzl and __builtin_clzll."
+    DEFAULT OFF
+)
+
+config_option(
+    KernelCtzNoBuiltin CTZ_NO_BUILTIN
+    "Expose implementations of ctzl and ctzll to verification by avoiding the use \
+     of __builtin_ctzl and __builtin_ctzll."
+    DEFAULT OFF
+)
+
 add_config_library(kernel "${configure_string}")
diff --git a/configs/RISCV64_MCS_verified.cmake b/configs/RISCV64_MCS_verified.cmake
old mode 100755
new mode 100644
index a945fcc3e..856fc8fcd
--- a/configs/RISCV64_MCS_verified.cmake
+++ b/configs/RISCV64_MCS_verified.cmake
@@ -26,4 +26,6 @@ set(KernelRootCNodeSizeBits 19 CACHE STRING "")
 set(KernelMaxNumBootinfoUntypedCaps 50 CACHE STRING "")
 set(KernelIsMCS ON CACHE BOOL "")
 set(KernelStaticMaxBudgetUs "(60 * 60 * MS_IN_S * US_IN_MS)" CACHE STRING "")
+set(KernelClzNoBuiltin ON CACHE BOOL "")
+set(KernelCtzNoBuiltin ON CACHE BOOL "")
 include(${CMAKE_CURRENT_LIST_DIR}/seL4Config.cmake)
diff --git a/configs/RISCV64_verified.cmake b/configs/RISCV64_verified.cmake
old mode 100755
new mode 100644
index 555aa1bcb..a1778b79e
--- a/configs/RISCV64_verified.cmake
+++ b/configs/RISCV64_verified.cmake
@@ -24,4 +24,6 @@ set(KernelNumDomains 16 CACHE STRING "")
 set(KernelMaxNumBootinfoUntypedCap 166 CACHE STRING "")
 set(KernelRootCNodeSizeBits 19 CACHE STRING "")
 set(KernelMaxNumBootinfoUntypedCaps 50 CACHE STRING "")
+set(KernelClzNoBuiltin ON CACHE BOOL "")
+set(KernelCtzNoBuiltin ON CACHE BOOL "")
 include(${CMAKE_CURRENT_LIST_DIR}/seL4Config.cmake)
diff --git a/include/util.h b/include/util.h
index faa6bff47..e708d3040 100644
--- a/include/util.h
+++ b/include/util.h
@@ -98,60 +98,137 @@ int PURE strncmp(const char *s1, const char *s2, int n);
 long CONST char_to_long(char c);
 long PURE str_to_long(const char *str);
 
+// Library functions for counting leading/trailing zeros.
+// GCC's builtins will emit calls to these functions when the platform
+// does not provide suitable inline assembly.
+// We only emit function definitions if CONFIG_CLZL_IMPL etc are set.
+NO_INLINE CONST int __clzdi2(unsigned long x);
+NO_INLINE CONST int __clzti2(unsigned long long x);
+NO_INLINE CONST int __ctzdi2(unsigned long x);
+NO_INLINE CONST int __ctzti2(unsigned long long x);
 
-int __builtin_clzl(unsigned long x);
-int __builtin_ctzl(unsigned long x);
+// Used for compile-time constants, so should always use the builtin.
+#define CTZL(x) __builtin_ctzl(x)
 
-#ifdef CONFIG_ARCH_RISCV
-uint32_t __clzsi2(uint32_t x);
-uint32_t __ctzsi2(uint32_t x);
-uint32_t __clzdi2(uint64_t x);
-uint32_t __ctzdi2(uint64_t x);
-#endif
+// Count leading zeros.
+// The CONFIG_CLZ_NO_BUILTIN macro may be used to expose the library function
+// to the C parser for verification.
+#ifndef CONFIG_CLZ_NO_BUILTIN
+// If we use a compiler builtin, we cannot verify it, so we use the following
+// annotations to hide the function body from the proofs, and axiomatise its
+// behaviour.
+// On the other hand, if we use our own implementation instead of the builtin,
+// then we want to expose that implementation to the proofs, and therefore hide
+// these annotations.
 /** MODIFIES: */
 /** DONT_TRANSLATE */
 /** FNSPEC clzl_spec:
   "\<forall>s. \<Gamma> \<turnstile>
-    {\<sigma>. s = \<sigma> \<and> x_' s \<noteq> 0 }
+    {\<sigma>. s = \<sigma> \<and> x___unsigned_long_' s \<noteq> 0 }
       \<acute>ret__long :== PROC clzl(\<acute>x)
-    \<lbrace> \<acute>ret__long = of_nat (word_clz (x_' s)) \<rbrace>"
+    \<lbrace> \<acute>ret__long = of_nat (word_clz (x___unsigned_long_' s)) \<rbrace>"
 */
+#endif
 static inline long
 CONST clzl(unsigned long x)
 {
+#ifdef CONFIG_CLZ_NO_BUILTIN
+    return __clzdi2(x);
+#else
     return __builtin_clzl(x);
+#endif
 }
 
+#ifndef CONFIG_CLZ_NO_BUILTIN
+// See comments on clzl.
+/** MODIFIES: */
+/** DONT_TRANSLATE */
+/** FNSPEC clzll_spec:
+  "\<forall>s. \<Gamma> \<turnstile>
+    {\<sigma>. s = \<sigma> \<and> x___unsigned_longlong_' s \<noteq> 0 }
+      \<acute>ret__longlong :== PROC clzll(\<acute>x)
+    \<lbrace> \<acute>ret__longlong = of_nat (word_clz (x___unsigned_longlong_' s)) \<rbrace>"
+*/
+#endif
+static inline long long
+CONST clzll(unsigned long long x)
+{
+#ifdef CONFIG_CLZ_NO_BUILTIN
+    return __clzti2(x);
+#else
+    return __builtin_clzll(x);
+#endif
+}
+
+// Count trailing zeros.
+#ifndef CONFIG_CTZ_NO_BUILTIN
+// See comments on clzl.
 /** MODIFIES: */
 /** DONT_TRANSLATE */
 /** FNSPEC ctzl_spec:
   "\<forall>s. \<Gamma> \<turnstile>
-    {\<sigma>. s = \<sigma> \<and> x_' s \<noteq> 0 }
+    {\<sigma>. s = \<sigma> \<and> x___unsigned_long_' s \<noteq> 0 }
       \<acute>ret__long :== PROC ctzl(\<acute>x)
-    \<lbrace> \<acute>ret__long = of_nat (word_ctz (x_' s)) \<rbrace>"
+    \<lbrace> \<acute>ret__long = of_nat (word_ctz (x___unsigned_long_' s)) \<rbrace>"
 */
+#endif
 static inline long
 CONST ctzl(unsigned long x)
 {
+#ifdef CONFIG_CTZ_NO_BUILTIN
+// If there is a builtin CLZ, but no builtin CTZ, then CTZ will be implemented
+// using the builtin CLZ, rather than the long-form implementation.
+// This is typically the fastest way to calculate ctzl on such platforms.
+#ifdef CONFIG_CLZ_NO_BUILTIN
+    // Here, there are no builtins we can use, so call the library function.
+    return __ctzdi2(x);
+#else
+    // Here, we have __builtin_clzl, but no __builtin_ctzl.
+    if (unlikely(x == 0)) {
+        return 8 * sizeof(unsigned long);
+    }
+    // -x = ~x + 1, so (x & -x) isolates the least significant 1-bit of x,
+    // allowing ctzl to be calculated from clzl and the word size.
+    return 8 * sizeof(unsigned long) - 1 - __builtin_clzl(x & -x);
+#endif
+#else
+    // Here, we have __builtin_ctzl.
     return __builtin_ctzl(x);
+#endif
 }
 
-#define CTZL(x) __builtin_ctzl(x)
+#ifndef CONFIG_CTZ_NO_BUILTIN
+// See comments on clzl.
+/** MODIFIES: */
+/** DONT_TRANSLATE */
+/** FNSPEC ctzll_spec:
+  "\<forall>s. \<Gamma> \<turnstile>
+    {\<sigma>. s = \<sigma> \<and> x___unsigned_longlong_' s \<noteq> 0 }
+      \<acute>ret__longlong :== PROC ctzll(\<acute>x)
+    \<lbrace> \<acute>ret__longlong = of_nat (word_ctz (x___unsigned_longlong_' s)) \<rbrace>"
+*/
+#endif
+static inline long long
+CONST ctzll(unsigned long long x)
+{
+#ifdef CONFIG_CTZ_NO_BUILTIN
+// See comments on ctzl.
+#ifdef CONFIG_CLZ_NO_BUILTIN
+    return __ctzti2(x);
+#else
+    if (unlikely(x == 0)) {
+        return 8 * sizeof(unsigned long long);
+    }
+    // See comments on ctzl.
+    return 8 * sizeof(unsigned long long) - 1 - __builtin_clzll(x & -x);
+#endif
+#else
+    return __builtin_ctzll(x);
+#endif
+}
 
 int __builtin_popcountl(unsigned long x);
 
-/** DONT_TRANSLATE */
-/** FNSPEC clzll_spec:
-  "\<forall>s. \<Gamma> \<turnstile>
-    {\<sigma>. s = \<sigma> \<and> x_' s \<noteq> 0 }
-      \<acute>ret__longlong :== PROC clzll(\<acute>x)
-    \<lbrace> \<acute>ret__longlong = of_nat (word_clz (x_' s)) \<rbrace>"
-*/
-static inline long long CONST clzll(unsigned long long x)
-{
-    return __builtin_clzll(x);
-}
-
 /** DONT_TRANSLATE */
 static inline long
 CONST popcountl(unsigned long mask)
diff --git a/src/arch/riscv/config.cmake b/src/arch/riscv/config.cmake
index 883e250e9..122de4484 100644
--- a/src/arch/riscv/config.cmake
+++ b/src/arch/riscv/config.cmake
@@ -27,6 +27,16 @@ config_option(
     DEPENDS "KernelArchRiscV"
 )
 
+# Until RISC-V has instructions to count leading/trailing zeros, we provide
+# library implementations.
+# In the verified configurations, we additionally define KernelClzNoBuiltin and
+# KernelCtzNoBuiltin to expose the library implementations to verification.
+# However, since the NoBuiltin options force the use of the library functions
+# even when the platform has sutiable inline assembly, we do not make these the
+# default.
+set(KernelClzlImpl ON CACHE BOOL "")
+set(KernelCtzlImpl ON CACHE BOOL "")
+
 if(KernelSel4ArchRiscV32)
     set(KernelPTLevels 2 CACHE STRING "" FORCE)
 endif()
diff --git a/src/util.c b/src/util.c
index af42d8ab2..73e9eba6c 100644
--- a/src/util.c
+++ b/src/util.c
@@ -132,44 +132,314 @@ long PURE str_to_long(const char *str)
     return val;
 }
 
-#ifdef CONFIG_ARCH_RISCV
-uint32_t __clzsi2(uint32_t x)
-{
-    uint32_t count = 0;
-    while (!(x & 0x80000000U) && count < 34) {
-        x <<= 1;
-        count++;
-    }
-    return count;
-}
+// The following implementations of CLZ (count leading zeros) and CTZ (count
+// trailing zeros) perform a binary search for the first 1 bit from the
+// beginning (resp. end) of the input. Initially, the focus is the whole input.
+// Then, each iteration determines whether there are any 1 bits set in the
+// upper (resp. lower) half of the current focus. If there are (resp. are not),
+// then the upper half is shifted into the lower half. Either way, the lower
+// half of the current focus becomes the new focus for the next iteration.
+// After enough iterations (6 for 64-bit inputs, 5 for 32-bit inputs), the
+// focus is reduced to a single bit, and the total number of bits shifted can
+// be used to determine the number of zeros before (resp. after) the first 1
+// bit.
+//
+// Although the details vary, the general approach is used in several library
+// implementations, including in LLVM and GCC. Wikipedia has some references:
+// https://en.wikipedia.org/wiki/Find_first_set
+//
+// The current implementation avoids branching. The test that determines
+// whether the upper (resp. lower) half contains any ones directly produces a
+// number which can be used for an unconditional shift. If the upper (resp.
+// lower) half is all zeros, the test produces a zero, and the shift is a
+// no-op. A branchless implementation has the disadvantage that it requires
+// more instructions to execute than one which branches, but the advantage is
+// that none will be mispredicted branches. Whether this is a good tradeoff
+// depends on the branch predictability and the architecture's pipeline depth.
+// The most critical use of clzl in the kernel is in the scheduler priority
+// queue. In the absence of a concrete application and hardware implementation
+// to evaluate the tradeoff, we somewhat arbitrarily choose a branchless
+// implementation. In any case, the compiler might convert this to a branching
+// binary.
 
-uint32_t __ctzsi2(uint32_t x)
-{
-    uint32_t count = 0;
-    while (!(x & 0x000000001) && count <= 32) {
-        x >>= 1;
-        count++;
-    }
-    return count;
-}
+// Check some assumptions made by the CLZ and CTZ library functions:
+compile_assert(clz_ulong_32_or_64, sizeof(unsigned long) == 4 || sizeof(unsigned long) == 8);
+compile_assert(clz_ullong_64, sizeof(unsigned long long) == 8);
 
-uint32_t __clzdi2(uint64_t x)
+#ifdef CONFIG_CLZL_IMPL
+// Count leading zeros.
+// This implementation contains no branches. If the architecture provides an
+// instruction to set a register to a boolean value on a comparison, then the
+// binary might also avoid branching. A branchless implementation might be
+// preferable on architectures with deep pipelines, or when the maximum
+// priority of runnable threads frequently varies. However, note that the
+// compiler may choose to convert this to a branching implementation.
+static CONST inline unsigned clz32(uint32_t x)
 {
-    uint32_t count = 0;
-    while (!(x & 0x8000000000000000U) && count < 65) {
-        x <<= 1;
-        count++;
-    }
-    return count;
-}
+    // Compiler builtins typically return int, but we use unsigned internally
+    // to reduce the number of guards we see in the proofs.
+    unsigned count = 32;
+    uint32_t mask = UINT32_MAX;
 
-uint32_t __ctzdi2(uint64_t x)
-{
-    uint32_t count = 0;
-    while (!(x & 0x00000000000000001) && count <= 64) {
-        x >>= 1;
-        count++;
+    // Each iteration i (counting backwards) considers the least significant
+    // 2^(i+1) bits of x as the current focus. At the first iteration, the
+    // focus is the whole input. Each iteration assumes x contains no 1 bits
+    // outside its focus. The iteration contains a test which determines
+    // whether there are any 1 bits in the upper half (2^i bits) of the focus,
+    // setting `bits` to 2^i if there are, or zero if not. Shifting by `bits`
+    // then narrows the focus to the lower 2^i bits and satisfies the
+    // assumption for the next iteration. After the final iteration, the focus
+    // is just the least significant bit, and the most significsnt 1 bit of the
+    // original input (if any) has been shifted into this position. The leading
+    // zero count can be determined from the total shift.
+    //
+    // The iterations are given a very regular structure to facilitate proofs,
+    // while also generating reasonably efficient binary code.
+
+    // The `if (1)` blocks make it easier to reason by chunks in the proofs.
+    if (1) {
+        // iteration 4
+        mask >>= (1 << 4); // 0x0000ffff
+        unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
+        x >>= bits; // <= 0x0000ffff
+        count -= bits; // [16, 32]
     }
+    if (1) {
+        // iteration 3
+        mask >>= (1 << 3); // 0x000000ff
+        unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
+        x >>= bits; // <= 0x000000ff
+        count -= bits; // [8, 16, 24, 32]
+    }
+    if (1) {
+        // iteration 2
+        mask >>= (1 << 2); // 0x0000000f
+        unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
+        x >>= bits; // <= 0x0000000f
+        count -= bits; // [4, 8, 12, ..., 32]
+    }
+    if (1) {
+        // iteration 1
+        mask >>= (1 << 1); // 0x00000003
+        unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
+        x >>= bits; // <= 0x00000003
+        count -= bits; // [2, 4, 6, ..., 32]
+    }
+    if (1) {
+        // iteration 0
+        mask >>= (1 << 0); // 0x00000001
+        unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
+        x >>= bits; // <= 0x00000001
+        count -= bits; // [1, 2, 3, ..., 32]
+    }
+
+    // If the original input was zero, there will have been no shifts, so this
+    // gives a result of 32. Otherwise, x is now exactly 1, so subtracting from
+    // count gives a result from [0, 1, 2, ..., 31].
+    return count - x;
+}
+#endif
+
+#if defined(CONFIG_CLZL_IMPL) || defined (CONFIG_CLZLL_IMPL)
+static CONST inline unsigned clz64(uint64_t x)
+{
+    unsigned count = 64;
+    uint64_t mask = UINT64_MAX;
+
+    // Although we could implement this using clz32, we spell out the
+    // iterations in full for slightly better code generation at low
+    // optimisation levels, and to allow us to reuse the proof machinery we
+    // developed for clz32.
+    if (1) {
+        // iteration 5
+        mask >>= (1 << 5); // 0x00000000ffffffff
+        unsigned bits = ((unsigned)(mask < x)) << 5; // [0, 32]
+        x >>= bits; // <= 0x00000000ffffffff
+        count -= bits; // [32, 64]
+    }
+    if (1) {
+        // iteration 4
+        mask >>= (1 << 4); // 0x000000000000ffff
+        unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
+        x >>= bits; // <= 0x000000000000ffff
+        count -= bits; // [16, 32, 48, 64]
+    }
+    if (1) {
+        // iteration 3
+        mask >>= (1 << 3); // 0x00000000000000ff
+        unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
+        x >>= bits; // <= 0x00000000000000ff
+        count -= bits; // [8, 16, 24, ..., 64]
+    }
+    if (1) {
+        // iteration 2
+        mask >>= (1 << 2); // 0x000000000000000f
+        unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
+        x >>= bits; // <= 0x000000000000000f
+        count -= bits; // [4, 8, 12, ..., 64]
+    }
+    if (1) {
+        // iteration 1
+        mask >>= (1 << 1); // 0x0000000000000003
+        unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
+        x >>= bits; // <= 0x0000000000000003
+        count -= bits; // [2, 4, 6, ..., 64]
+    }
+    if (1) {
+        // iteration 0
+        mask >>= (1 << 0); // 0x0000000000000001
+        unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
+        x >>= bits; // <= 0x0000000000000001
+        count -= bits; // [1, 2, 3, ..., 64]
+    }
+
+    return count - x;
+}
+#endif
+
+#ifdef CONFIG_CLZL_IMPL
+int __clzdi2(unsigned long x)
+{
+    return sizeof(unsigned long) == 4 ? clz32(x) : clz64(x);
+}
+#endif
+
+#ifdef CONFIG_CLZLL_IMPL
+int __clzti2(unsigned long long x)
+{
+    return clz64(x);
+}
+#endif
+
+#ifdef CONFIG_CTZL_IMPL
+// Count trailing zeros.
+// See comments on clz32.
+static CONST inline unsigned ctz32(uint32_t x)
+{
+    unsigned count = (x == 0);
+    uint32_t mask = UINT32_MAX;
+
+    // Each iteration i (counting backwards) considers the least significant
+    // 2^(i+1) bits of x as the current focus. At the first iteration, the
+    // focus is the whole input. The iteration contains a test which determines
+    // whether there are any 1 bits in the lower half (2^i bits) of the focus,
+    // setting `bits` to zero if there are, or 2^i if not. Shifting by `bits`
+    // then narrows the focus to the lower 2^i bits for the next iteration.
+    // After the final iteration, the focus is just the least significant bit,
+    // and the least significsnt 1 bit of the original input (if any) has been
+    // shifted into this position. The trailing zero count can be determined
+    // from the total shift.
+    //
+    // If the initial input is zero, every iteration causes a shift, for a
+    // total shift count of 31, so in that case, we add one for a total count
+    // of 32. In the comments, xi is the initial value of x.
+    //
+    // The iterations are given a very regular structure to facilitate proofs,
+    // while also generating reasonably efficient binary code.
+
+    if (1) {
+        // iteration 4
+        mask >>= (1 << 4); // 0x0000ffff
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
+        x >>= bits; // xi != 0 --> x & 0x0000ffff != 0
+        count += bits; // if xi != 0 then [0, 16] else 17
+    }
+    if (1) {
+        // iteration 3
+        mask >>= (1 << 3); // 0x000000ff
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
+        x >>= bits; // xi != 0 --> x & 0x000000ff != 0
+        count += bits; // if xi != 0 then [0, 8, 16, 24] else 25
+    }
+    if (1) {
+        // iteration 2
+        mask >>= (1 << 2); // 0x0000000f
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
+        x >>= bits; // xi != 0 --> x & 0x0000000f != 0
+        count += bits; // if xi != 0 then [0, 4, 8, ..., 28] else 29
+    }
+    if (1) {
+        // iteration 1
+        mask >>= (1 << 1); // 0x00000003
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
+        x >>= bits; // xi != 0 --> x & 0x00000003 != 0
+        count += bits; // if xi != 0 then [0, 2, 4, ..., 30] else 31
+    }
+    if (1) {
+        // iteration 0
+        mask >>= (1 << 0); // 0x00000001
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
+        x >>= bits; // xi != 0 --> x & 0x00000001 != 0
+        count += bits; // if xi != 0 then [0, 1, 2, ..., 31] else 32
+    }
+
     return count;
 }
-#endif /* CONFIG_ARCH_RISCV */
+#endif
+
+#if defined(CONFIG_CTZL_IMPL) || defined (CONFIG_CTZLL_IMPL)
+static CONST inline unsigned ctz64(uint64_t x)
+{
+    unsigned count = (x == 0);
+    uint64_t mask = UINT64_MAX;
+
+    if (1) {
+        // iteration 5
+        mask >>= (1 << 5); // 0x00000000ffffffff
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 5; // [0, 32]
+        x >>= bits; // xi != 0 --> x & 0x00000000ffffffff != 0
+        count += bits; // if xi != 0 then [0, 32] else 33
+    }
+    if (1) {
+        // iteration 4
+        mask >>= (1 << 4); // 0x000000000000ffff
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
+        x >>= bits; // xi != 0 --> x & 0x000000000000ffff != 0
+        count += bits; // if xi != 0 then [0, 16, 32, 48] else 49
+    }
+    if (1) {
+        // iteration 3
+        mask >>= (1 << 3); // 0x00000000000000ff
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
+        x >>= bits; // xi != 0 --> x & 0x00000000000000ff != 0
+        count += bits; // if xi != 0 then [0, 8, 16, ..., 56] else 57
+    }
+    if (1) {
+        // iteration 2
+        mask >>= (1 << 2); // 0x000000000000000f
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
+        x >>= bits; // xi != 0 --> x & 0x000000000000000f != 0
+        count += bits; // if xi != 0 then [0, 4, 8, ..., 60] else 61
+    }
+    if (1) {
+        // iteration 1
+        mask >>= (1 << 1); // 0x0000000000000003
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
+        x >>= bits; // xi != 0 --> x & 0x0000000000000003 != 0
+        count += bits; // if xi != 0 then [0, 2, 4, ..., 62] else 63
+    }
+    if (1) {
+        // iteration 0
+        mask >>= (1 << 0); // 0x0000000000000001
+        unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
+        x >>= bits; // xi != 0 --> x & 0x0000000000000001 != 0
+        count += bits; // if xi != 0 then [0, 1, 2, ..., 63] else 64
+    }
+
+    return count;
+}
+#endif
+
+#ifdef CONFIG_CTZL_IMPL
+int __ctzdi2(unsigned long x)
+{
+    return sizeof(unsigned long) == 4 ? ctz32(x) : ctz64(x);
+}
+#endif
+
+#ifdef CONFIG_CTZLL_IMPL
+int __ctzti2(unsigned long long x)
+{
+    return ctz64(x);
+}
+#endif