In [1]:
get_ipython().ast_node_interactivity = 'all'
import os
import matplotlib.pyplot as plt
import numpy as np
import subprocess
from tqdm.notebook import tqdm
import random
In [2]:
def cdx5(key, data):
    cmd = "/home/leo/external/projects/crypto/cdx/cdx5"
    cmd += f" <(echo -n '{data.hex()}' | xxd -ps -r) /dev/stdout <(echo -n '{key.hex()}' | xxd -ps -r)"
    res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
    return res.stdout
In [3]:
cdx5(b"TestKey", b"Yellow submarine").hex()
Out [3]:
'930fc26f125afdc25aa9b3bfc094e40b'
In [4]:
KEY_SIZE = 12
DATA_SIZE = 8
In [5]:
def bytes_to_bits(buf):
    for byte in buf:
        for bit in bin(byte)[2:].zfill(8):
            yield int(bit)

def bits_to_bytes(bits, size):
    return int("".join(map(str, bits)), 2).to_bytes(size, "big")
In [6]:
def make_sample():
    key = os.urandom(KEY_SIZE)
    data = os.urandom(DATA_SIZE)
    data_enc = cdx5(b"CDX5" + key, data)
    data_bits = list(bytes_to_bits(data))
    data_bits[random.randint(0, DATA_SIZE * 8 - 1)] ^= 1
    newdata = bits_to_bytes(data_bits, DATA_SIZE)
    newdata_enc = cdx5(b"CDX5" + key, newdata)
    X = list(bytes_to_bits(data)) + list(bytes_to_bits(data_enc)) + list(bytes_to_bits(newdata)) + list(bytes_to_bits(newdata_enc))
    Y = list(bytes_to_bits(key))
    X = [int(x) for x in X]
    Y = [int(x) for x in Y]
    return X, Y
In []:
from sklearn import neural_network
bitModels = []

def make_samples(N, bit):
    X = []
    Y = []
    for i in range(N):
        while True:
            x, y = make_sample()
            if y[bit] == i & 1:
                break
        X.append(x)
        Y.append(y[bit])
    X = np.array(X)
    Y = np.array(Y)
    return X, Y

for bit in tqdm(range(KEY_SIZE * 8)):
    model = neural_network.MLPRegressor(solver='lbfgs')
    X, Y = make_samples(256, bit)
    _ = model.fit(X, Y)
    bitModels.append(model)
Out:
  0%|          | 0/96 [00:00, ?it/s]
In []:
key = b"CDX5keyrecovery!"

keybits = np.zeros(KEY_SIZE * 8)

iters = 2 ** 11
for i in tqdm(range(iters)):
    data = os.urandom(DATA_SIZE)
    data_enc = cdx5(key, data)
    data_bits = list(bytes_to_bits(data))
    data_bits[random.randint(0, DATA_SIZE * 8 - 1)] ^= 1
    newdata = bits_to_bytes(data_bits, DATA_SIZE)
    newdata_enc = cdx5(b"CDX5" + key, newdata)
    X = list(bytes_to_bits(data)) + list(bytes_to_bits(data_enc)) + list(bytes_to_bits(newdata)) + list(bytes_to_bits(newdata_enc))
    X = np.array([X])
    
    for bit in range(KEY_SIZE * 8):
        model = bitModels[bit]
        Y = model.predict(X)
        keybits[bit] += Y[0]
keybits /= iters
keybits = keybits > 0.5
keybits = list([int(x) for x in keybits])

print("".join(map(str, bytes_to_bits(key[4:]))))
print("".join(map(str, keybits)))

total = 0
correct = 0
for i, (bit1, bit2) in enumerate(zip(keybits, bytes_to_bits(key[4:]))):
    if bit1 == bit2:
        correct += 1
    total += 1
    
print(f"{correct / total * 100:.02f}% of bits guessed correctly")