How to backdoor a cipher OR how to solve not-backdoored@MidnightSunCTF

On the 18th to 19th of September MidnightSunCTF was held. Among the challenges, there was a crypto challenge based on a Feistel construction.

The following code was given:

import os
import copy

from secret import flag


class NB_128:

    ROUNDS = 44
    LENGTH = 15
    KEYLEN = 16
    CONST  = 3

    A = [
        0x9b,0x25,0x29,0x2f,0x75,0x61,0x49,0x5b,0x5f,0x17,0xf3,0xbd,0x92,0xce,0x0e,0x54,
        0xf5,0xf8,0x83,0x88,0xcc,0xd5,0x8a,0x95,0x13,0x56,0xc5,0x86,0xb7,0xe6,0x51,0x06,
        0x77,0x23,0x13,0x41,0xd0,0x90,0x84,0xc2,0x62,0x7e,0xa6,0xbc,0x58,0x50,0xac,0xa2,
        0x33,0x6a,0x2d,0x72,0xfd,0xb0,0xd3,0x98,0xba,0xab,0x04,0x13,0xe9,0xec,0x67,0x64,
        0xc5,0x48,0x53,0xd8,0xf2,0x6b,0x54,0xcb,0x54,0x91,0x62,0xa1,0xfe,0x2f,0xf8,0x2f,
        0x4a,0xca,0xa6,0x20,0x14,0x80,0xc8,0x5a,0x47,0x8f,0x0b,0xc5,0x84,0x58,0xf8,0x22,
        0x43,0x9a,0xbd,0x62,0x83,0x4e,0x4d,0x86,0xbd,0x2c,0xe3,0x74,0xe0,0x65,0x8e,0x0d,
        0x58,0x8c,0xdc,0x0e,0xf1,0x31,0x45,0x83,0x3a,0xa6,0x1e,0x84,0x0e,0x86,0x1a,0x94,
        0x43,0xd0,0x53,0xc6,0xb7,0x30,0x97,0x16,0x26,0xfd,0x96,0x4b,0x4f,0x80,0xcf,0x06,
        0x98,0x06,0xf2,0x6a,0x05,0x8f,0x5f,0xd3,0x61,0xb7,0xab,0x7b,0x61,0xa3,0x25,0x5f,
        0xd3,0x14,0xab,0x6a,0xd0,0x03,0x98,0x4d,0xd9,0x56,0x01,0x88,0x47,0xdc,0xaf,0x32,
        0x9c,0x56,0x9e,0x52,0xf6,0x28,0xc4,0x1c,0x0a,0x88,0xa8,0x2c,0xfd,0x6b,0x6f,0xff,
        0x4d,0x53,0xc7,0xdf,0xde,0xd4,0x64,0x68,0xc3,0x95,0xe9,0xb9,0xcd,0x8f,0xd7,0x93,
        0xc9,0xda,0x39,0x2c,0x33,0x34,0xf3,0xf2,0xdb,0x80,0x8b,0xd6,0xbc,0xf3,0xdc,0x95,
        0x09,0x43,0xeb,0xa7,0x6d,0x33,0xbf,0xe7,0xe8,0xea,0xaa,0xae,0x11,0x07,0x63,0x73,
        0x19,0x5e,0x81,0xc0,0x14,0x47,0xbc,0xe9,0x64,0x6b,0x5c,0x55,0xf4,0xef,0xfc,0xe1
    ]
    
    B = [
        0xf3,0x44,0x4d,0x4a,0x05,0x10,0x3d,0x2e,0x7f,0x36,0xd7,0x98,0xa2,0xff,0x3a,0x61,
        0x88,0x84,0xfa,0xf0,0xa1,0xb9,0xe3,0xfd,0x2e,0x6a,0xfc,0xbe,0x9a,0xca,0x78,0x2e,
        0x63,0x36,0x03,0x50,0xd4,0x95,0x84,0xc3,0x36,0x2b,0xf6,0xed,0x1c,0x15,0xec,0xe3,
        0x3a,0x62,0x20,0x7e,0xe4,0xa8,0xce,0x84,0xf3,0xe3,0x49,0x5f,0xb0,0xb4,0x3a,0x38,
        0x68,0xe4,0xfa,0x70,0x4f,0xd7,0xed,0x73,0xb9,0x7d,0x8b,0x49,0x03,0xd3,0x01,0xd7,
        0xfa,0x7b,0x12,0x95,0xb4,0x21,0x6c,0xff,0xb7,0x7e,0xff,0x30,0x64,0xb9,0x1c,0xc7,
        0x9a,0x42,0x60,0xbe,0x4a,0x86,0x80,0x4a,0x24,0xb4,0x7e,0xe8,0x69,0xed,0x03,0x81,
        0x9c,0x49,0x1c,0xcf,0x25,0xe4,0x95,0x52,0xbe,0x23,0x9e,0x05,0x9a,0x13,0x8a,0x05,
        0x30,0xa2,0x24,0xb0,0xd4,0x52,0xf0,0x70,0x15,0xcf,0xa1,0x7d,0x6c,0xa2,0xe8,0x20,
        0xf6,0x69,0x98,0x01,0x7b,0xf0,0x25,0xa8,0x4f,0x98,0x81,0x50,0x5f,0x9c,0xa1,0x64,
        0xd4,0x12,0xa8,0x68,0xc7,0x15,0x8b,0x5f,0x9e,0x10,0x42,0xca,0x10,0x8a,0xfc,0x60,
        0x86,0x4d,0x80,0x4d,0xfc,0x23,0xca,0x13,0x50,0xd3,0xf6,0x73,0xb7,0x20,0x21,0xb0,
        0x9b,0xec,0x7d,0x64,0x70,0x7b,0xce,0xc3,0x3d,0x6a,0x13,0x42,0x23,0x60,0x3d,0x78,
        0x6a,0x78,0x9e,0x8a,0x80,0x86,0x44,0x44,0x38,0x62,0x6c,0x30,0x4f,0x01,0x2b,0x63,
        0xc3,0x88,0x25,0x68,0xb7,0xe8,0x61,0x38,0x62,0x61,0x24,0x21,0x8b,0x9c,0xfd,0xec,
        0xce,0x88,0x52,0x12,0xd3,0x81,0x7f,0x2b,0xf3,0xfd,0xcf,0xc7,0x73,0x69,0x7f,0x63
    ]

    def F(x, i):
        if i % 2:
            return NB_128.A[x]
        else:
            return NB_128.B[x]

    def rot(s):
        return s[1:] + [s[0]]

    def encrypt(self, m):
        m = copy.copy(m)
        for i in range(NB_128.ROUNDS):
            m[0] ^= NB_128.F(
                m[1] ^ self.key[i % NB_128.KEYLEN], i
            ) ^ NB_128.CONST
            m = NB_128.rot(m)
        return m
    
    def __init__(self, key):
        self.key = key

key = list(bytearray(os.urandom(16)))
message = list(bytearray(flag))

cipher = NB_128(key)
ciphertext = cipher.encrypt(message)

It is quite easy to notice that certain ciphertext bytes depend only on five of the key bytes along with message bytes and therefore can be brute forced in at most 2^{5 \times 8} guesses (which was the methodology pursued by — to my knowledge — all participants), which obviously could be a very badly concealed backdoor. However, this is not the actual backdoor.

The actual backdoor is that the S-box was generated by a polynomial over \mathbf F_{2^8}, but with randomly chosen indices swapped. This thwarts a possible interpolation attack, because the S-box is not a perfect polynomial. So, how does one detect such a relation? One possible answer is Reed-Solomon codes which essentially performs a best-fit interpolation.

If A[x] = y, then it means that P_A(x) = y. So, if we include all 256 points, we get a matrix relation of the form

\mathbf y = \beta G_A = \begin{bmatrix}  \beta_0 & \beta_1 & \cdots & b_{k-1} \end{bmatrix} \begin{bmatrix} 1 & \dots & 1 & \dots & 1 \\ \alpha_1 & \dots & \alpha_k & \dots & \alpha_n \\ \alpha_1^2 & \dots & \alpha_k^2 & \dots & \alpha_n^2 \\ \vdots & & \vdots & & \vdots \\ \alpha_1^{k-1} & \dots & \alpha_k^{k-1} & \dots & \alpha_n^{k-1} \end{bmatrix}

This forms a [2^8, k]-linear (RS) code, where polynomials up to degree k can be found.

R = GF(2^8)
pointsA = [R.fetch_int(A[i]) for i in range(256)]
pointsB = [R.fetch_int(B[i]) for i in range(256)]

# we can find the best approximation under some polynomial
# using the RS-decoder.
C = codes.GeneralizedReedSolomonCode([R.fetch_int(x) for x in range(256)], 128)
D = codes.decoders.GRSBerlekampWelchDecoder(C)
print("S-box A approximation:", D.decode_to_message(vector(pointsA)))
print("S-box B approximation:", D.decode_to_message(vector(pointsB)))

This gives two polynomials that are unique for each S-box. Using these relations, we can compute a polynomial representation of the cipher, albeit with some chance of error depending on the number of rounds. Again, these relations between ciphertext, message and key bytes form a linear code. Note that we treat different powers of key bytes k_i^j and k_i^l where j \neq l as separate.

R2.<k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k14, k15, k16> = PolynomialRing(R)
v_keys = [k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k14, k15, k16]

block = 0 # use different different blocks to reveal different key bits, start with the ones involving fewest relations
for m, c in zip(messages, ciphertexts):
    vc = copy(m)
    for i in range(ROUNDS):
        # modify c non-linearly using symbols and x^3 approximation
        vc[0] += F(vc[1] + v_keys[i % KEYS], i) + constant
        vc = rol(vc)
    CC += [c[block]]
    VV += [vc[block]]

Q = []
b = []
for vv, cc in zip(VV, CC):
    Q.append(vv)
    b.append(cc)Q = Sequence(Q)

C, vb = Q.coefficient_matrix()
code = codes.LinearCode(C.transpose())
ISD = LeeBrickellISDAlgorithm(code, (0, 18))
sol = C.solve_right(ISD.decode(vector(b)))

This will reveal the key bytes.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s