/*	$NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $	*/

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * AES-CCM, as defined in:
 *
 *	D. Whiting, R. Housley, and N. Ferguson, `Counter with CBC-MAC
 *	(CCM)', IETF RFC 3610, September 2003.
 *	https://tools.ietf.org/html/rfc3610
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: aes_ccm.c,v 1.6 2021/10/17 14:45:45 jmcneill Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>

#include <lib/libkern/libkern.h>

#include <crypto/aes/aes.h>
#include <crypto/aes/aes_ccm.h>
#include <crypto/aes/aes_impl.h>

static inline void
xor(uint8_t *x, const uint8_t *a, const uint8_t *b, size_t n)
{

	while (n --> 0)
		*x++ = *a++ ^ *b++;
}

/* RFC 3610, §2.2 Authentication */
#define	CCM_AFLAGS_ADATA	__BIT(6)
#define	CCM_AFLAGS_M		__BITS(5,3)
#define	CCM_AFLAGS_L		__BITS(2,0)

/* RFC 3610, §2.3 Encryption */
#define	CCM_EFLAGS_L		__BITS(2,0)

static void
aes_ccm_inc(struct aes_ccm *C)
{
	uint8_t *ctr = C->authctr + 16;

	KASSERT(C->L == 2);
	if (++ctr[15] == 0 && ++ctr[14] == 0)
		panic("AES-CCM overflow");
}

static void
aes_ccm_zero_ctr(struct aes_ccm *C)
{
	uint8_t *ctr = C->authctr + 16;

	KASSERT(C->L == 2);
	ctr[14] = ctr[15] = 0;
}

void
aes_ccm_init(struct aes_ccm *C, unsigned nr, const struct aesenc *enc,
    unsigned L, unsigned M,
    const uint8_t *nonce, unsigned noncelen, const void *ad, size_t adlen,
    size_t mlen)
{
	const uint8_t *adp = ad;
	uint8_t *auth = C->authctr;
	uint8_t *ctr = C->authctr + 16;
	unsigned i;

	KASSERT(L == 2);
	KASSERT(M % 2 == 0);
	KASSERT(M >= 4);
	KASSERT(M <= 16);
	KASSERT(noncelen == 15 - L);

	C->enc = enc;
	C->nr = nr;
	C->L = L;
	C->M = M;
	C->mlen = C->mleft = mlen;

	/* Encode B0, the initial authenticated data block.  */
	auth[0] = __SHIFTIN(adlen == 0 ? 0 : 1, CCM_AFLAGS_ADATA);
	auth[0] |= __SHIFTIN((M - 2)/2, CCM_AFLAGS_M);
	auth[0] |= __SHIFTIN(L - 1, CCM_AFLAGS_L);
	memcpy(auth + 1, nonce, noncelen);
	for (i = 0; i < L; i++, mlen >>= 8) {
		KASSERT(i < 16 - 1 - noncelen);
		auth[16 - i - 1] = mlen & 0xff;
	}
	aes_enc(enc, auth, auth, C->nr);

	/* Process additional authenticated data, if any.  */
	if (adlen) {
		/* Encode the length according to the table on p. 4.  */
		if (adlen < 0xff00) {
			auth[0] ^= adlen >> 8;
			auth[1] ^= adlen;
			i = 2;
		} else if (adlen < 0xffffffff) {
			auth[0] ^= 0xff;
			auth[1] ^= 0xfe;
			auth[2] ^= adlen >> 24;
			auth[3] ^= adlen >> 16;
			auth[4] ^= adlen >> 8;
			auth[5] ^= adlen;
			i = 6;
#if SIZE_MAX > 0xffffffffU
		} else {
			CTASSERT(SIZE_MAX <= 0xffffffffffffffff);
			auth[0] ^= 0xff;
			auth[1] ^= 0xff;
			auth[2] ^= adlen >> 56;
			auth[3] ^= adlen >> 48;
			auth[4] ^= adlen >> 40;
			auth[5] ^= adlen >> 32;
			auth[6] ^= adlen >> 24;
			auth[7] ^= adlen >> 16;
			auth[8] ^= adlen >> 8;
			auth[9] ^= adlen;
			i = 10;
#endif
		}

		/* Fill out the partial block if we can, and encrypt.  */
		xor(auth + i, auth + i, adp, MIN(adlen, 16 - i));
		adp += MIN(adlen, 16 - i);
		adlen -= MIN(adlen, 16 - i);
		aes_enc(enc, auth, auth, C->nr);

		/* If there was anything more, process 16 bytes at a time.  */
		if (adlen - (adlen % 16)) {
			aes_cbcmac_update1(enc, adp, adlen - (adlen % 16),
			    auth, C->nr);
			adlen %= 16;
		}

		/*
		 * If there's anything at the end, enter it in (padded
		 * with zeros, which is a no-op) and process it.
		 */
		if (adlen) {
			xor(auth, auth, adp, adlen);
			aes_enc(enc, auth, auth, C->nr);
		}
	}

	/* Set up the AES input for AES-CTR encryption.  */
	ctr[0] = __SHIFTIN(L - 1, CCM_EFLAGS_L);
	memcpy(ctr + 1, nonce, noncelen);
	memset(ctr + 1 + noncelen, 0, 16 - 1 - noncelen);

	/* Start on a block boundary.  */
	C->i = 0;
}

void
aes_ccm_enc(struct aes_ccm *C, const void *in, void *out, size_t nbytes)
{
	uint8_t *auth = C->authctr;
	uint8_t *ctr = C->authctr + 16;
	const uint8_t *p = in;
	uint8_t *q = out;

	KASSERTMSG(C->i != ~0u,
	    "%s not allowed after message complete", __func__);
	KASSERTMSG(nbytes <= C->mleft,
	    "message too long: promised %zu bytes, processing >=%zu",
	    C->mlen, C->mlen - C->mleft + nbytes);
	C->mleft -= nbytes;

	/* Finish a partial block if it was already started.  */
	if (C->i) {
		unsigned m = MIN(16 - C->i, nbytes);

		xor(auth + C->i, auth + C->i, p, m);
		xor(q, C->out + C->i, p, m);
		C->i += m;
		p += m;
		q += m;
		nbytes -= m;

		if (C->i == 16) {
			/* Finished a block; authenticate it.  */
			aes_enc(C->enc, auth, auth, C->nr);
			C->i = 0;
		} else {
			/* Didn't finish block, must be done with input. */
			KASSERT(nbytes == 0);
			return;
		}
	}

	/* Process 16 bytes at a time.  */
	if (nbytes - (nbytes % 16)) {
		aes_ccm_enc1(C->enc, p, q, nbytes - (nbytes % 16), auth,
		    C->nr);
		p += nbytes - (nbytes % 16);
		q += nbytes - (nbytes % 16);
		nbytes %= 16;
	}

	/* Incorporate any <16-byte unit as a partial block.  */
	if (nbytes) {
		/* authenticate */
		xor(auth, auth, p, nbytes);

		/* encrypt */
		aes_ccm_inc(C);
		aes_enc(C->enc, ctr, C->out, C->nr);
		xor(q, C->out, p, nbytes);

		C->i = nbytes;
	}
}

void
aes_ccm_dec(struct aes_ccm *C, const void *in, void *out, size_t nbytes)
{
	uint8_t *auth = C->authctr;
	uint8_t *ctr = C->authctr + 16;
	const uint8_t *p = in;
	uint8_t *q = out;

	KASSERTMSG(C->i != ~0u,
	    "%s not allowed after message complete", __func__);
	KASSERTMSG(nbytes <= C->mleft,
	    "message too long: promised %zu bytes, processing >=%zu",
	    C->mlen, C->mlen - C->mleft + nbytes);
	C->mleft -= nbytes;

	/* Finish a partial block if it was already started.  */
	if (C->i) {
		unsigned m = MIN(16 - C->i, nbytes);

		xor(q, C->out + C->i, p, m);
		xor(auth + C->i, auth + C->i, q, m);
		C->i += m;
		p += m;
		q += m;
		nbytes -= m;

		if (C->i == 16) {
			/* Finished a block; authenticate it.  */
			aes_enc(C->enc, auth, auth, C->nr);
			C->i = 0;
		} else {
			/* Didn't finish block, must be done with input. */
			KASSERT(nbytes == 0);
			return;
		}
	}

	/* Process 16 bytes at a time.  */
	if (nbytes - (nbytes % 16)) {
		aes_ccm_dec1(C->enc, p, q, nbytes - (nbytes % 16), auth,
		    C->nr);
		p += nbytes - (nbytes % 16);
		q += nbytes - (nbytes % 16);
		nbytes %= 16;
	}

	/* Incorporate any <16-byte unit as a partial block.  */
	if (nbytes) {
		/* decrypt */
		aes_ccm_inc(C);
		aes_enc(C->enc, ctr, C->out, C->nr);
		xor(q, C->out, p, nbytes);

		/* authenticate */
		xor(auth, auth, q, nbytes);

		C->i = nbytes;
	}
}

void
#if defined(__m68k__) && __GNUC_PREREQ__(8, 0)
__attribute__((__optimize__("O0")))
#endif
aes_ccm_tag(struct aes_ccm *C, void *out)
{
	uint8_t *auth = C->authctr;
	const uint8_t *ctr = C->authctr + 16;

	KASSERTMSG(C->mleft == 0,
	    "message too short: promised %zu bytes, processed %zu",
	    C->mlen, C->mlen - C->mleft);

	/* Zero-pad and munch up a partial block, if any.  */
	if (C->i)
		aes_enc(C->enc, auth, auth, C->nr);

	/* Zero the counter and generate a pad for the tag.  */
	aes_ccm_zero_ctr(C);
	aes_enc(C->enc, ctr, C->out, C->nr);

	/* Copy out as many bytes as requested.  */
	xor(out, C->out, auth, C->M);

	C->i = ~0u;		/* paranoia: prevent future misuse */
}

int
aes_ccm_verify(struct aes_ccm *C, const void *tag)
{
	uint8_t expected[16];
	int result;

	aes_ccm_tag(C, expected);
	result = consttime_memequal(tag, expected, C->M);
	explicit_memset(expected, 0, sizeof expected);

	return result;
}

/* RFC 3610, §8 */

static const uint8_t keyC[16] = {
	0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7,
	0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf,
};

static const uint8_t keyD[16] = {
	0xd7,0x82,0x8d,0x13, 0xb2,0xb0,0xbd,0xc3,
	0x25,0xa7,0x62,0x36, 0xdf,0x93,0xcc,0x6b,
};

static const uint8_t ptxt_seq[] = {
	0x00,0x01,0x02,0x03, 0x04,0x05,0x06,0x07,
	0x08,0x09,0x0a,0x0b, 0x0c,0x0d,0x0e,0x0f,
	0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17,
	0x18,0x19,0x1a,0x1b, 0x1c,0x1d,0x1e,0x1f,
	0x20,
};

static const uint8_t ptxt_rand[] = {
	0x6e,0x37,0xa6,0xef, 0x54,0x6d,0x95,0x5d,
	0x34,0xab,0x60,0x59, 0xab,0xf2,0x1c,0x0b,
	0x02,0xfe,0xb8,0x8f, 0x85,0x6d,0xf4,0xa3,
	0x73,0x81,0xbc,0xe3, 0xcc,0x12,0x85,0x17,
	0xd4,
};

static const struct {
	const uint8_t *key;
	size_t noncelen;
	const uint8_t nonce[13];
	size_t adlen;
	const uint8_t *ad;
	size_t mlen;
	const uint8_t *ptxt;
	unsigned M;
	const uint8_t tag[16];
	const uint8_t *ctxt;
} T[] = {
	[0] = {		/* Packet Vector #1, p. 11 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x03, 0x02,0x01,0x00,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 8,
		.ad = ptxt_seq,
		.mlen = 23,
		.ptxt = ptxt_seq + 8,
		.M = 8,
		.tag = {0x17,0xe8,0xd1,0x2c,0xfd, 0xf9,0x26,0xe0},
		.ctxt = (const uint8_t[23]) {
			0x58,0x8c,0x97,0x9a, 0x61,0xc6,0x63,0xd2,
			0xf0,0x66,0xd0,0xc2, 0xc0,0xf9,0x89,0x80,
			0x6d,0x5f,0x6b,0x61, 0xda,0xc3,0x84,
		},
	},
	[1] = {			/* Packet Vector #2, p. 11 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x04, 0x03,0x02,0x01,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 8,
		.ad = ptxt_seq,
		.mlen = 24,
		.ptxt = ptxt_seq + 8,
		.M = 8,
		.tag = {0xa0,0x91,0xd5,0x6e, 0x10,0x40,0x09,0x16},
		.ctxt = (const uint8_t[24]) {
			0x72,0xc9,0x1a,0x36, 0xe1,0x35,0xf8,0xcf,
			0x29,0x1c,0xa8,0x94, 0x08,0x5c,0x87,0xe3,
			0xcc,0x15,0xc4,0x39, 0xc9,0xe4,0x3a,0x3b,
		},
	},
	[2] = {			/* Packet Vector #3, p. 12 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x05, 0x04,0x03,0x02,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 8,
		.ad = ptxt_seq,
		.mlen = 25,
		.ptxt = ptxt_seq + 8,
		.M = 8,
		.tag = {0x4a,0xda,0xa7,0x6f, 0xbd,0x9f,0xb0,0xc5},
		.ctxt = (const uint8_t[25]) {
			0x51,0xb1,0xe5,0xf4, 0x4a,0x19,0x7d,0x1d,
			0xa4,0x6b,0x0f,0x8e, 0x2d,0x28,0x2a,0xe8,
			0x71,0xe8,0x38,0xbb, 0x64,0xda,0x85,0x96,
			0x57,
		},
	},
	[3] = {			/* Packet Vector #4, p. 13 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x06, 0x05,0x04,0x03,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 12,
		.ad = ptxt_seq,
		.mlen = 19,
		.ptxt = ptxt_seq + 12,
		.M = 8,
		.tag = {0x96,0xc8,0x61,0xb9, 0xc9,0xe6,0x1e,0xf1},
		.ctxt = (const uint8_t[19]) {
			0xa2,0x8c,0x68,0x65, 0x93,0x9a,0x9a,0x79,
			0xfa,0xaa,0x5c,0x4c, 0x2a,0x9d,0x4a,0x91,
			0xcd,0xac,0x8c,
		},
	},
	[4] = {			/* Packet Vector #5, p. 13 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x07, 0x06,0x05,0x04,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 12,
		.ad = ptxt_seq,
		.mlen = 20,
		.ptxt = ptxt_seq + 12,
		.M = 8,
		.tag = {0x51,0xe8,0x3f,0x07, 0x7d,0x9c,0x2d,0x93},
		.ctxt = (const uint8_t[20]) {
			0xdc,0xf1,0xfb,0x7b, 0x5d,0x9e,0x23,0xfb,
			0x9d,0x4e,0x13,0x12, 0x53,0x65,0x8a,0xd8,
			0x6e,0xbd,0xca,0x3e,
		},
	},
	[5] = {			/* Packet Vector #6, p. 13 */
		.key = keyC,
		.nonce = {
			0x00,0x00,0x00,0x08, 0x07,0x06,0x05,0xa0,
			0xa1,0xa2,0xa3,0xa4, 0xa5,
		},
		.adlen = 12,
		.ad = ptxt_seq,
		.mlen = 21,
		.ptxt = ptxt_seq + 12,
		.M = 8,
		.tag = {0x40,0x5a,0x04,0x43, 0xac,0x91,0xcb,0x94},
		.ctxt = (const uint8_t[21]) {
			0x6f,0xc1,0xb0,0x11, 0xf0,0x06,0x56,0x8b,
			0x51,0x71,0xa4,0x2d, 0x95,0x3d,0x46,0x9b,
			0x25,0x70,0xa4,0xbd, 0x87,
		},
	},
	[6] = {			/* Packet Vector #24 */
		.key = keyD,
		.nonce = {
			0x00,0x8d,0x49,0x3b, 0x30,0xae,0x8b,0x3c,
			0x96,0x96,0x76,0x6c, 0xfa,
		},
		.adlen = 12,
		.ad = ptxt_rand,
		.mlen = 21,
		.ptxt = ptxt_rand + 12,
		.M = 10,
		.tag = {0x6d,0xce,0x9e,0x82, 0xef,0xa1,0x6d,0xa6, 0x20,0x59},
		.ctxt = (const uint8_t[21]) {
			0xf3,0x29,0x05,0xb8, 0x8a,0x64,0x1b,0x04,
			0xb9,0xc9,0xff,0xb5, 0x8c,0xc3,0x90,0x90,
			0x0f,0x3d,0xa1,0x2a, 0xb1,
		},
	},
};

int
aes_ccm_selftest(void)
{
	const unsigned L = 2;
	const unsigned noncelen = 13;
	struct aesenc enc, *AE = &enc;
	struct aes_ccm ccm, *C = &ccm;
	uint8_t buf[33 + 2], *bufp = buf + 1;
	uint8_t tag[16 + 2], *tagp = tag + 1;
	unsigned i;
	int result = 0;

	bufp[-1] = bufp[33] = 0x1a;
	tagp[-1] = tagp[16] = 0x53;

	for (i = 0; i < __arraycount(T); i++) {
		const unsigned nr = aes_setenckey128(AE, T[i].key);

		/* encrypt and authenticate */
		aes_ccm_init(C, nr, AE, L, T[i].M, T[i].nonce, noncelen,
		    T[i].ad, T[i].adlen, T[i].mlen);
		aes_ccm_enc(C, T[i].ptxt, bufp, 1);
		aes_ccm_enc(C, T[i].ptxt + 1, bufp + 1, 2);
		aes_ccm_enc(C, T[i].ptxt + 3, bufp + 3, T[i].mlen - 4);
		aes_ccm_enc(C, T[i].ptxt + T[i].mlen - 1,
		    bufp + T[i].mlen - 1, 1);
		aes_ccm_tag(C, tagp);
		if (memcmp(bufp, T[i].ctxt, T[i].mlen)) {
			char name[32];
			snprintf(name, sizeof name, "%s: ctxt %u", __func__,
			    i);
			hexdump(printf, name, bufp, T[i].mlen);
			result = -1;
		}
		if (memcmp(tagp, T[i].tag, T[i].M)) {
			char name[32];
			snprintf(name, sizeof name, "%s: tag %u", __func__, i);
			hexdump(printf, name, tagp, T[i].M);
			result = -1;
		}

		/* decrypt and verify */
		aes_ccm_init(C, nr, AE, L, T[i].M, T[i].nonce, noncelen,
		    T[i].ad, T[i].adlen, T[i].mlen);
		aes_ccm_dec(C, T[i].ctxt, bufp, 1);
		aes_ccm_dec(C, T[i].ctxt + 1, bufp + 1, 2);
		aes_ccm_dec(C, T[i].ctxt + 3, bufp + 3, T[i].mlen - 4);
		aes_ccm_dec(C, T[i].ctxt + T[i].mlen - 1,
		    bufp + T[i].mlen - 1, 1);
		if (!aes_ccm_verify(C, T[i].tag)) {
			printf("%s: verify %u failed\n", __func__, i);
			result = -1;
		}
		if (memcmp(bufp, T[i].ptxt, T[i].mlen)) {
			char name[32];
			snprintf(name, sizeof name, "%s: ptxt %u", __func__,
			    i);
			hexdump(printf, name, bufp, T[i].mlen);
			result = -1;
		}

		/* decrypt and verify with a bit flipped */
		memcpy(tagp, T[i].tag, T[i].M);
		tagp[0] ^= 0x80;
		aes_ccm_init(C, nr, AE, L, T[i].M, T[i].nonce, noncelen,
		    T[i].ad, T[i].adlen, T[i].mlen);
		aes_ccm_dec(C, T[i].ctxt, bufp, 1);
		aes_ccm_dec(C, T[i].ctxt + 1, bufp + 1, 2);
		aes_ccm_dec(C, T[i].ctxt + 3, bufp + 3, T[i].mlen - 4);
		aes_ccm_dec(C, T[i].ctxt + T[i].mlen - 1,
		    bufp + T[i].mlen - 1, 1);
		if (aes_ccm_verify(C, tagp)) {
			printf("%s: forgery %u succeeded\n", __func__, i);
			result = -1;
		}
	}

	if (bufp[-1] != 0x1a || bufp[33] != 0x1a) {
		printf("%s: buffer overrun\n", __func__);
		result = -1;
	}
	if (tagp[-1] != 0x53 || tagp[16] != 0x53) {
		printf("%s: tag overrun\n", __func__);
		result = -1;
	}

	return result;
}

/* XXX provisional hack */
#include <sys/module.h>

MODULE(MODULE_CLASS_MISC, aes_ccm, "aes");

static int
aes_ccm_modcmd(modcmd_t cmd, void *opaque)
{

	switch (cmd) {
	case MODULE_CMD_INIT:
		if (aes_ccm_selftest())
			return EIO;
		aprint_debug("aes_ccm: self-test passed\n");
		return 0;
	case MODULE_CMD_FINI:
		return 0;
	default:
		return ENOTTY;
	}
}
