[crypto] Replace AES implementation
Replace the AES implementation from AXTLS with a dedicated iPXE implementation which is slightly smaller and around 1000% faster. This implementation has been verified using the existing self-tests based on the NIST AES test vectors. Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
parent
cbb07f0ef7
commit
09824eca31
|
@ -0,0 +1,804 @@
|
|||
/*
|
||||
* Copyright (C) 2015 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*
|
||||
* You can also choose to distribute this program under the terms of
|
||||
* the Unmodified Binary Distribution Licence (as given in the file
|
||||
* COPYING.UBDL), provided that you have satisfied its requirements.
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
/** @file
|
||||
*
|
||||
* AES algorithm
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <byteswap.h>
|
||||
#include <ipxe/rotate.h>
|
||||
#include <ipxe/crypto.h>
|
||||
#include <ipxe/ecb.h>
|
||||
#include <ipxe/cbc.h>
|
||||
#include <ipxe/aes.h>
|
||||
|
||||
/** AES strides
|
||||
*
|
||||
* These are the strides (modulo 16) used to walk through the AES
|
||||
* input state bytes in order of byte position after [Inv]ShiftRows.
|
||||
*/
|
||||
enum aes_stride {
|
||||
/** Input stride for ShiftRows
|
||||
*
|
||||
* 0 4 8 c
|
||||
* \ \ \
|
||||
* 1 5 9 d
|
||||
* \ \ \
|
||||
* 2 6 a e
|
||||
* \ \ \
|
||||
* 3 7 b f
|
||||
*/
|
||||
AES_STRIDE_SHIFTROWS = +5,
|
||||
/** Input stride for InvShiftRows
|
||||
*
|
||||
* 0 4 8 c
|
||||
* / / /
|
||||
* 1 5 9 d
|
||||
* / / /
|
||||
* 2 6 a e
|
||||
* / / /
|
||||
* 3 7 b f
|
||||
*/
|
||||
AES_STRIDE_INVSHIFTROWS = -3,
|
||||
};
|
||||
|
||||
/** A single AES lookup table entry
|
||||
*
|
||||
* This represents the product (in the Galois field GF(2^8)) of an
|
||||
* eight-byte vector multiplier with a single scalar multiplicand.
|
||||
*
|
||||
* The vector multipliers used for AES will be {1,1,1,3,2,1,1,3} for
|
||||
* MixColumns and {1,9,13,11,14,9,13,11} for InvMixColumns. This
|
||||
* allows for the result of multiplying any single column of the
|
||||
* [Inv]MixColumns matrix by a scalar value to be obtained simply by
|
||||
* extracting the relevant four-byte subset from the lookup table
|
||||
* entry.
|
||||
*
|
||||
* For example, to find the result of multiplying the second column of
|
||||
* the MixColumns matrix by the scalar value 0x80:
|
||||
*
|
||||
* MixColumns column[0]: { 2, 1, 1, 3 }
|
||||
* MixColumns column[1]: { 3, 2, 1, 1 }
|
||||
* MixColumns column[2]: { 1, 3, 2, 1 }
|
||||
* MixColumns column[3]: { 1, 1, 3, 2 }
|
||||
* Vector multiplier: { 1, 1, 1, 3, 2, 1, 1, 3 }
|
||||
* Scalar multiplicand: 0x80
|
||||
* Lookup table entry: { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
|
||||
*
|
||||
* The second column of the MixColumns matrix is {3,2,1,1}. The
|
||||
* product of this column with the scalar value 0x80 can be obtained
|
||||
* by extracting the relevant four-byte subset of the lookup table
|
||||
* entry:
|
||||
*
|
||||
* MixColumns column[1]: { 3, 2, 1, 1 }
|
||||
* Vector multiplier: { 1, 1, 1, 3, 2, 1, 1, 3 }
|
||||
* Lookup table entry: { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
|
||||
* Product: { 0x9b, 0x1b, 0x80, 0x80 }
|
||||
*
|
||||
* The column lookups require only seven bytes of the eight-byte
|
||||
* entry: the remaining (first) byte is used to hold the scalar
|
||||
* multiplicand itself (i.e. the first byte of the vector multiplier
|
||||
* is always chosen to be 1).
|
||||
*/
|
||||
union aes_table_entry {
|
||||
/** Viewed as an array of bytes */
|
||||
uint8_t byte[8];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** An AES lookup table
|
||||
*
|
||||
* This represents the products (in the Galois field GF(2^8)) of a
|
||||
* constant eight-byte vector multiplier with all possible 256 scalar
|
||||
* multiplicands.
|
||||
*
|
||||
* The entries are indexed by the AES [Inv]SubBytes S-box output
|
||||
* values (denoted S(N)). This allows for the result of multiplying
|
||||
* any single column of the [Inv]MixColumns matrix by S(N) to be
|
||||
* obtained simply by extracting the relevant four-byte subset from
|
||||
* the Nth table entry. For example:
|
||||
*
|
||||
* Input byte (N): 0x3a
|
||||
* SubBytes output S(N): 0x80
|
||||
* MixColumns column[1]: { 3, 2, 1, 1 }
|
||||
* Vector multiplier: { 1, 1, 1, 3, 2, 1, 1, 3 }
|
||||
* Table entry[0x3a]: { 0x80, 0x80, 0x80, 0x9b, 0x1b, 0x80, 0x80, 0x9b }
|
||||
* Product: { 0x9b, 0x1b, 0x80, 0x80 }
|
||||
*
|
||||
* Since the first byte of the eight-byte vector multiplier is always
|
||||
* chosen to be 1, the value of S(N) may be lookup up by extracting
|
||||
* the first byte of the Nth table entry.
|
||||
*/
|
||||
struct aes_table {
|
||||
/** Table entries, indexed by S(N) */
|
||||
union aes_table_entry entry[256];
|
||||
} __attribute__ (( aligned ( 8 ) ));
|
||||
|
||||
/** AES MixColumns lookup table */
|
||||
static struct aes_table aes_mixcolumns;
|
||||
|
||||
/** AES InvMixColumns lookup table */
|
||||
static struct aes_table aes_invmixcolumns;
|
||||
|
||||
/**
|
||||
* Multiply [Inv]MixColumns matrix column by scalar multiplicand
|
||||
*
|
||||
* @v entry AES lookup table entry for scalar multiplicand
|
||||
* @v column [Inv]MixColumns matrix column index
|
||||
* @ret product Product of matrix column with scalar multiplicand
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) uint32_t
|
||||
aes_entry_column ( const union aes_table_entry *entry, unsigned int column ) {
|
||||
const uint8_t *first __attribute__ (( may_alias ));
|
||||
|
||||
/* Locate start of relevant four-byte subset */
|
||||
first = &entry->byte[ 4 - column ];
|
||||
|
||||
/* Extract this four-byte subset */
|
||||
return ( *( ( uint32_t * ) first ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiply [Inv]MixColumns matrix column by S-boxed input byte
|
||||
*
|
||||
* @v table AES lookup table
|
||||
* @v stride AES row shift stride
|
||||
* @v in AES input state
|
||||
* @v offset Output byte offset (after [Inv]ShiftRows)
|
||||
* @ret product Product of matrix column with S(input byte)
|
||||
*
|
||||
* Note that the specified offset is not the offset of the input byte;
|
||||
* it is the offset of the output byte which corresponds to the input
|
||||
* byte. This output byte offset is used to calculate both the input
|
||||
* byte offset and to select the appropriate matric column.
|
||||
*
|
||||
* With a compile-time constant offset, this function will optimise
|
||||
* down to a single "movzbl" (to extract the input byte) and will
|
||||
* generate a single x86 memory reference expression which can then be
|
||||
* used directly within a single "xorl" instruction.
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) uint32_t
|
||||
aes_column ( const struct aes_table *table, size_t stride,
|
||||
const union aes_matrix *in, size_t offset ) {
|
||||
const union aes_table_entry *entry;
|
||||
unsigned int byte;
|
||||
|
||||
/* Extract input byte corresponding to this output byte offset
|
||||
* (i.e. perform [Inv]ShiftRows).
|
||||
*/
|
||||
byte = in->byte[ ( stride * offset ) & 0xf ];
|
||||
|
||||
/* Locate lookup table entry for this input byte (i.e. perform
|
||||
* [Inv]SubBytes).
|
||||
*/
|
||||
entry = &table->entry[byte];
|
||||
|
||||
/* Multiply appropriate matrix column by this input byte
|
||||
* (i.e. perform [Inv]MixColumns).
|
||||
*/
|
||||
return aes_entry_column ( entry, ( offset & 0x3 ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate intermediate round output column
|
||||
*
|
||||
* @v table AES lookup table
|
||||
* @v stride AES row shift stride
|
||||
* @v in AES input state
|
||||
* @v key AES round key
|
||||
* @v column Column index
|
||||
* @ret output Output column value
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) uint32_t
|
||||
aes_output ( const struct aes_table *table, size_t stride,
|
||||
const union aes_matrix *in, const union aes_matrix *key,
|
||||
unsigned int column ) {
|
||||
size_t offset = ( column * 4 );
|
||||
|
||||
/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
|
||||
* AddRoundKey for this column. The loop is unrolled to allow
|
||||
* for the required compile-time constant optimisations.
|
||||
*/
|
||||
return ( aes_column ( table, stride, in, ( offset + 0 ) ) ^
|
||||
aes_column ( table, stride, in, ( offset + 1 ) ) ^
|
||||
aes_column ( table, stride, in, ( offset + 2 ) ) ^
|
||||
aes_column ( table, stride, in, ( offset + 3 ) ) ^
|
||||
key->column[column] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a single intermediate round
|
||||
*
|
||||
* @v table AES lookup table
|
||||
* @v stride AES row shift stride
|
||||
* @v in AES input state
|
||||
* @v out AES output state
|
||||
* @v key AES round key
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) void
|
||||
aes_round ( const struct aes_table *table, size_t stride,
|
||||
const union aes_matrix *in, union aes_matrix *out,
|
||||
const union aes_matrix *key ) {
|
||||
|
||||
/* Perform [Inv]ShiftRows, [Inv]SubBytes, [Inv]MixColumns, and
|
||||
* AddRoundKey for all columns. The loop is unrolled to allow
|
||||
* for the required compile-time constant optimisations.
|
||||
*/
|
||||
out->column[0] = aes_output ( table, stride, in, key, 0 );
|
||||
out->column[1] = aes_output ( table, stride, in, key, 1 );
|
||||
out->column[2] = aes_output ( table, stride, in, key, 2 );
|
||||
out->column[3] = aes_output ( table, stride, in, key, 3 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform encryption intermediate rounds
|
||||
*
|
||||
* @v in AES input state
|
||||
* @v out AES output state
|
||||
* @v key Round keys
|
||||
* @v rounds Number of rounds (must be odd)
|
||||
*
|
||||
* This function is deliberately marked as non-inlinable to ensure
|
||||
* maximal availability of registers for GCC's register allocator,
|
||||
* which has a tendency to otherwise spill performance-critical
|
||||
* registers to the stack.
|
||||
*/
|
||||
static __attribute__ (( noinline )) void
|
||||
aes_encrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
|
||||
const union aes_matrix *key, unsigned int rounds ) {
|
||||
union aes_matrix *tmp;
|
||||
|
||||
/* Perform intermediate rounds */
|
||||
do {
|
||||
/* Perform one intermediate round */
|
||||
aes_round ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
|
||||
in, out, key++ );
|
||||
|
||||
/* Swap input and output states for next round */
|
||||
tmp = in;
|
||||
in = out;
|
||||
out = tmp;
|
||||
|
||||
} while ( --rounds );
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform decryption intermediate rounds
|
||||
*
|
||||
* @v in AES input state
|
||||
* @v out AES output state
|
||||
* @v key Round keys
|
||||
* @v rounds Number of rounds (must be odd)
|
||||
*
|
||||
* As with aes_encrypt_rounds(), this function is deliberately marked
|
||||
* as non-inlinable.
|
||||
*
|
||||
* This function could potentially use the same binary code as is used
|
||||
* for encryption. To compensate for the difference between ShiftRows
|
||||
* and InvShiftRows, half of the input byte offsets would have to be
|
||||
* modifiable at runtime (half by an offset of +4/-4, half by an
|
||||
* offset of -4/+4 for ShiftRows/InvShiftRows). This can be
|
||||
* accomplished in x86 assembly within the number of available
|
||||
* registers, but GCC's register allocator struggles to do so,
|
||||
* resulting in a significant performance decrease due to registers
|
||||
* being spilled to the stack. We therefore use two separate but very
|
||||
* similar binary functions based on the same C source.
|
||||
*/
|
||||
static __attribute__ (( noinline )) void
|
||||
aes_decrypt_rounds ( union aes_matrix *in, union aes_matrix *out,
|
||||
const union aes_matrix *key, unsigned int rounds ) {
|
||||
union aes_matrix *tmp;
|
||||
|
||||
/* Perform intermediate rounds */
|
||||
do {
|
||||
/* Perform one intermediate round */
|
||||
aes_round ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS,
|
||||
in, out, key++ );
|
||||
|
||||
/* Swap input and output states for next round */
|
||||
tmp = in;
|
||||
in = out;
|
||||
out = tmp;
|
||||
|
||||
} while ( --rounds );
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform standalone AddRoundKey
|
||||
*
|
||||
* @v state AES state
|
||||
* @v key AES round key
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) void
|
||||
aes_addroundkey ( union aes_matrix *state, const union aes_matrix *key ) {
|
||||
|
||||
state->column[0] ^= key->column[0];
|
||||
state->column[1] ^= key->column[1];
|
||||
state->column[2] ^= key->column[2];
|
||||
state->column[3] ^= key->column[3];
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform final round
|
||||
*
|
||||
* @v table AES lookup table
|
||||
* @v stride AES row shift stride
|
||||
* @v in AES input state
|
||||
* @v out AES output state
|
||||
* @v key AES round key
|
||||
*/
|
||||
static void aes_final ( const struct aes_table *table, size_t stride,
|
||||
const union aes_matrix *in, union aes_matrix *out,
|
||||
const union aes_matrix *key ) {
|
||||
const union aes_table_entry *entry;
|
||||
unsigned int byte;
|
||||
size_t out_offset;
|
||||
size_t in_offset;
|
||||
|
||||
/* Perform [Inv]ShiftRows and [Inv]SubBytes */
|
||||
for ( out_offset = 0, in_offset = 0 ; out_offset < 16 ;
|
||||
out_offset++, in_offset = ( ( in_offset + stride ) & 0xf ) ) {
|
||||
|
||||
/* Extract input byte (i.e. perform [Inv]ShiftRows) */
|
||||
byte = in->byte[in_offset];
|
||||
|
||||
/* Locate lookup table entry for this input byte
|
||||
* (i.e. perform [Inv]SubBytes).
|
||||
*/
|
||||
entry = &table->entry[byte];
|
||||
|
||||
/* Store output byte */
|
||||
out->byte[out_offset] = entry->byte[0];
|
||||
}
|
||||
|
||||
/* Perform AddRoundKey */
|
||||
aes_addroundkey ( out, key );
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypt data
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v src Data to encrypt
|
||||
* @v dst Buffer for encrypted data
|
||||
* @v len Length of data
|
||||
*/
|
||||
static void aes_encrypt ( void *ctx, const void *src, void *dst, size_t len ) {
|
||||
struct aes_context *aes = ctx;
|
||||
union aes_matrix buffer[2];
|
||||
union aes_matrix *in = &buffer[0];
|
||||
union aes_matrix *out = &buffer[1];
|
||||
unsigned int rounds = aes->rounds;
|
||||
|
||||
/* Sanity check */
|
||||
assert ( len == sizeof ( *in ) );
|
||||
|
||||
/* Initialise input state */
|
||||
memcpy ( in, src, sizeof ( *in ) );
|
||||
|
||||
/* Perform initial round (AddRoundKey) */
|
||||
aes_addroundkey ( in, &aes->encrypt.key[0] );
|
||||
|
||||
/* Perform intermediate rounds (ShiftRows, SubBytes,
|
||||
* MixColumns, AddRoundKey).
|
||||
*/
|
||||
aes_encrypt_rounds ( in, out, &aes->encrypt.key[1], ( rounds - 2 ) );
|
||||
in = out;
|
||||
|
||||
/* Perform final round (ShiftRows, SubBytes, AddRoundKey) */
|
||||
out = dst;
|
||||
aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS, in, out,
|
||||
&aes->encrypt.key[ rounds - 1 ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt data
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v src Data to decrypt
|
||||
* @v dst Buffer for decrypted data
|
||||
* @v len Length of data
|
||||
*/
|
||||
static void aes_decrypt ( void *ctx, const void *src, void *dst, size_t len ) {
|
||||
struct aes_context *aes = ctx;
|
||||
union aes_matrix buffer[2];
|
||||
union aes_matrix *in = &buffer[0];
|
||||
union aes_matrix *out = &buffer[1];
|
||||
unsigned int rounds = aes->rounds;
|
||||
|
||||
/* Sanity check */
|
||||
assert ( len == sizeof ( *in ) );
|
||||
|
||||
/* Initialise input state */
|
||||
memcpy ( in, src, sizeof ( *in ) );
|
||||
|
||||
/* Perform initial round (AddRoundKey) */
|
||||
aes_addroundkey ( in, &aes->decrypt.key[0] );
|
||||
|
||||
/* Perform intermediate rounds (InvShiftRows, InvSubBytes,
|
||||
* InvMixColumns, AddRoundKey).
|
||||
*/
|
||||
aes_decrypt_rounds ( in, out, &aes->decrypt.key[1], ( rounds - 2 ) );
|
||||
in = out;
|
||||
|
||||
/* Perform final round (InvShiftRows, InvSubBytes, AddRoundKey) */
|
||||
out = dst;
|
||||
aes_final ( &aes_invmixcolumns, AES_STRIDE_INVSHIFTROWS, in, out,
|
||||
&aes->decrypt.key[ rounds - 1 ] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiply a polynomial by (x) modulo (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8)
|
||||
*
|
||||
* @v poly Polynomial to be multiplied
|
||||
* @ret result Result
|
||||
*/
|
||||
static __attribute__ (( const )) unsigned int aes_double ( unsigned int poly ) {
|
||||
|
||||
/* Multiply polynomial by (x), placing the resulting x^8
|
||||
* coefficient in the LSB (i.e. rotate byte left by one).
|
||||
*/
|
||||
poly = rol8 ( poly, 1 );
|
||||
|
||||
/* If coefficient of x^8 (in LSB) is non-zero, then reduce by
|
||||
* subtracting (x^8 + x^4 + x^3 + x^2 + 1) in GF(2^8).
|
||||
*/
|
||||
if ( poly & 0x01 ) {
|
||||
poly ^= 0x01; /* Subtract x^8 (currently in LSB) */
|
||||
poly ^= 0x1b; /* Subtract (x^4 + x^3 + x^2 + 1) */
|
||||
}
|
||||
|
||||
return poly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill in MixColumns lookup table entry
|
||||
*
|
||||
* @v entry AES lookup table entry for scalar multiplicand
|
||||
*
|
||||
* The MixColumns lookup table vector multiplier is {1,1,1,3,2,1,1,3}.
|
||||
*/
|
||||
static void aes_mixcolumns_entry ( union aes_table_entry *entry ) {
|
||||
unsigned int scalar_x_1;
|
||||
unsigned int scalar_x;
|
||||
unsigned int scalar;
|
||||
|
||||
/* Retrieve scalar multiplicand */
|
||||
scalar = entry->byte[0];
|
||||
entry->byte[1] = scalar;
|
||||
entry->byte[2] = scalar;
|
||||
entry->byte[5] = scalar;
|
||||
entry->byte[6] = scalar;
|
||||
|
||||
/* Calculate scalar multiplied by (x) */
|
||||
scalar_x = aes_double ( scalar );
|
||||
entry->byte[4] = scalar_x;
|
||||
|
||||
/* Calculate scalar multiplied by (x + 1) */
|
||||
scalar_x_1 = ( scalar_x ^ scalar );
|
||||
entry->byte[3] = scalar_x_1;
|
||||
entry->byte[7] = scalar_x_1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill in InvMixColumns lookup table entry
|
||||
*
|
||||
* @v entry AES lookup table entry for scalar multiplicand
|
||||
*
|
||||
* The InvMixColumns lookup table vector multiplier is {1,9,13,11,14,9,13,11}.
|
||||
*/
|
||||
static void aes_invmixcolumns_entry ( union aes_table_entry *entry ) {
|
||||
unsigned int scalar_x3_x2_x;
|
||||
unsigned int scalar_x3_x2_1;
|
||||
unsigned int scalar_x3_x2;
|
||||
unsigned int scalar_x3_x_1;
|
||||
unsigned int scalar_x3_1;
|
||||
unsigned int scalar_x3;
|
||||
unsigned int scalar_x2;
|
||||
unsigned int scalar_x;
|
||||
unsigned int scalar;
|
||||
|
||||
/* Retrieve scalar multiplicand */
|
||||
scalar = entry->byte[0];
|
||||
|
||||
/* Calculate scalar multiplied by (x) */
|
||||
scalar_x = aes_double ( scalar );
|
||||
|
||||
/* Calculate scalar multiplied by (x^2) */
|
||||
scalar_x2 = aes_double ( scalar_x );
|
||||
|
||||
/* Calculate scalar multiplied by (x^3) */
|
||||
scalar_x3 = aes_double ( scalar_x2 );
|
||||
|
||||
/* Calculate scalar multiplied by (x^3 + 1) */
|
||||
scalar_x3_1 = ( scalar_x3 ^ scalar );
|
||||
entry->byte[1] = scalar_x3_1;
|
||||
entry->byte[5] = scalar_x3_1;
|
||||
|
||||
/* Calculate scalar multiplied by (x^3 + x + 1) */
|
||||
scalar_x3_x_1 = ( scalar_x3_1 ^ scalar_x );
|
||||
entry->byte[3] = scalar_x3_x_1;
|
||||
entry->byte[7] = scalar_x3_x_1;
|
||||
|
||||
/* Calculate scalar multiplied by (x^3 + x^2) */
|
||||
scalar_x3_x2 = ( scalar_x3 ^ scalar_x2 );
|
||||
|
||||
/* Calculate scalar multiplied by (x^3 + x^2 + 1) */
|
||||
scalar_x3_x2_1 = ( scalar_x3_x2 ^ scalar );
|
||||
entry->byte[2] = scalar_x3_x2_1;
|
||||
entry->byte[6] = scalar_x3_x2_1;
|
||||
|
||||
/* Calculate scalar multiplied by (x^3 + x^2 + x) */
|
||||
scalar_x3_x2_x = ( scalar_x3_x2 ^ scalar_x );
|
||||
entry->byte[4] = scalar_x3_x2_x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate AES lookup tables
|
||||
*
|
||||
*/
|
||||
static void aes_generate ( void ) {
|
||||
union aes_table_entry *entry;
|
||||
union aes_table_entry *inventry;
|
||||
unsigned int poly = 0x01;
|
||||
unsigned int invpoly = 0x01;
|
||||
unsigned int transformed;
|
||||
unsigned int i;
|
||||
|
||||
/* Iterate over non-zero values of GF(2^8) using generator (x + 1) */
|
||||
do {
|
||||
|
||||
/* Multiply polynomial by (x + 1) */
|
||||
poly ^= aes_double ( poly );
|
||||
|
||||
/* Divide inverse polynomial by (x + 1). This code
|
||||
* fragment is taken directly from the Wikipedia page
|
||||
* on the Rijndael S-box. An explanation of why it
|
||||
* works would be greatly appreciated.
|
||||
*/
|
||||
invpoly ^= ( invpoly << 1 );
|
||||
invpoly ^= ( invpoly << 2 );
|
||||
invpoly ^= ( invpoly << 4 );
|
||||
if ( invpoly & 0x80 )
|
||||
invpoly ^= 0x09;
|
||||
invpoly &= 0xff;
|
||||
|
||||
/* Apply affine transformation */
|
||||
transformed = ( 0x63 ^ invpoly ^ rol8 ( invpoly, 1 ) ^
|
||||
rol8 ( invpoly, 2 ) ^ rol8 ( invpoly, 3 ) ^
|
||||
rol8 ( invpoly, 4 ) );
|
||||
|
||||
/* Populate S-box (within MixColumns lookup table) */
|
||||
aes_mixcolumns.entry[poly].byte[0] = transformed;
|
||||
|
||||
} while ( poly != 0x01 );
|
||||
|
||||
/* Populate zeroth S-box entry (which has no inverse) */
|
||||
aes_mixcolumns.entry[0].byte[0] = 0x63;
|
||||
|
||||
/* Fill in MixColumns and InvMixColumns lookup tables */
|
||||
for ( i = 0 ; i < 256 ; i++ ) {
|
||||
|
||||
/* Fill in MixColumns lookup table entry */
|
||||
entry = &aes_mixcolumns.entry[i];
|
||||
aes_mixcolumns_entry ( entry );
|
||||
|
||||
/* Populate inverse S-box (within InvMixColumns lookup table) */
|
||||
inventry = &aes_invmixcolumns.entry[ entry->byte[0] ];
|
||||
inventry->byte[0] = i;
|
||||
|
||||
/* Fill in InvMixColumns lookup table entry */
|
||||
aes_invmixcolumns_entry ( inventry );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rotate key column
|
||||
*
|
||||
* @v column Key column
|
||||
* @ret column Updated key column
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) uint32_t
|
||||
aes_key_rotate ( uint32_t column ) {
|
||||
|
||||
return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
|
||||
ror32 ( column, 8 ) : rol32 ( column, 8 ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply S-box to key column
|
||||
*
|
||||
* @v column Key column
|
||||
* @ret column Updated key column
|
||||
*/
|
||||
static uint32_t aes_key_sbox ( uint32_t column ) {
|
||||
unsigned int i;
|
||||
uint8_t byte;
|
||||
|
||||
for ( i = 0 ; i < 4 ; i++ ) {
|
||||
byte = ( column & 0xff );
|
||||
byte = aes_mixcolumns.entry[byte].byte[0];
|
||||
column = ( ( column & ~0xff ) | byte );
|
||||
column = rol32 ( column, 8 );
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply schedule round constant to key column
|
||||
*
|
||||
* @v column Key column
|
||||
* @v rcon Round constant
|
||||
* @ret column Updated key column
|
||||
*/
|
||||
static inline __attribute__ (( always_inline )) uint32_t
|
||||
aes_key_rcon ( uint32_t column, unsigned int rcon ) {
|
||||
|
||||
return ( ( __BYTE_ORDER == __LITTLE_ENDIAN ) ?
|
||||
( column ^ rcon ) : ( column ^ ( rcon << 24 ) ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Set key
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v key Key
|
||||
* @v keylen Key length
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
|
||||
struct aes_context *aes = ctx;
|
||||
union aes_matrix *enc;
|
||||
union aes_matrix *dec;
|
||||
union aes_matrix temp;
|
||||
union aes_matrix zero;
|
||||
unsigned int rcon = 0x01;
|
||||
unsigned int rounds;
|
||||
size_t offset = 0;
|
||||
uint32_t *prev;
|
||||
uint32_t *next;
|
||||
uint32_t *end;
|
||||
uint32_t tmp;
|
||||
|
||||
/* Generate lookup tables, if not already done */
|
||||
if ( ! aes_mixcolumns.entry[0].byte[0] )
|
||||
aes_generate();
|
||||
|
||||
/* Validate key length and calculate number of intermediate rounds */
|
||||
switch ( keylen ) {
|
||||
case ( 128 / 8 ) :
|
||||
rounds = 11;
|
||||
break;
|
||||
case ( 192 / 8 ) :
|
||||
rounds = 13;
|
||||
break;
|
||||
case ( 256 / 8 ) :
|
||||
rounds = 15;
|
||||
break;
|
||||
default:
|
||||
DBGC ( aes, "AES %p unsupported key length (%zd bits)\n",
|
||||
aes, ( keylen * 8 ) );
|
||||
return -EINVAL;
|
||||
}
|
||||
aes->rounds = rounds;
|
||||
enc = aes->encrypt.key;
|
||||
end = enc[rounds].column;
|
||||
|
||||
/* Copy raw key */
|
||||
memcpy ( enc, key, keylen );
|
||||
prev = enc->column;
|
||||
next = ( ( ( void * ) prev ) + keylen );
|
||||
tmp = next[-1];
|
||||
|
||||
/* Construct expanded key */
|
||||
while ( next < end ) {
|
||||
|
||||
/* If this is the first column of an expanded key
|
||||
* block, or the middle column of an AES-256 key
|
||||
* block, then apply the S-box.
|
||||
*/
|
||||
if ( ( offset == 0 ) || ( ( offset | keylen ) == 48 ) )
|
||||
tmp = aes_key_sbox ( tmp );
|
||||
|
||||
/* If this is the first column of an expanded key
|
||||
* block then rotate and apply the round constant.
|
||||
*/
|
||||
if ( offset == 0 ) {
|
||||
tmp = aes_key_rotate ( tmp );
|
||||
tmp = aes_key_rcon ( tmp, rcon );
|
||||
rcon = aes_double ( rcon );
|
||||
}
|
||||
|
||||
/* XOR with previous key column */
|
||||
tmp ^= *prev;
|
||||
|
||||
/* Store column */
|
||||
*next = tmp;
|
||||
|
||||
/* Move to next column */
|
||||
offset += sizeof ( *next );
|
||||
if ( offset == keylen )
|
||||
offset = 0;
|
||||
next++;
|
||||
prev++;
|
||||
}
|
||||
DBGC2 ( aes, "AES %p expanded %zd-bit key:\n", aes, ( keylen * 8 ) );
|
||||
DBGC2_HDA ( aes, 0, &aes->encrypt, ( rounds * sizeof ( *enc ) ) );
|
||||
|
||||
/* Convert to decryption key */
|
||||
memset ( &zero, 0, sizeof ( zero ) );
|
||||
dec = &aes->decrypt.key[ rounds - 1 ];
|
||||
memcpy ( dec--, enc++, sizeof ( *dec ) );
|
||||
while ( dec > aes->decrypt.key ) {
|
||||
/* Perform InvMixColumns (by reusing the encryption
|
||||
* final-round code to perform ShiftRows+SubBytes and
|
||||
* reusing the decryption intermediate-round code to
|
||||
* perform InvShiftRows+InvSubBytes+InvMixColumns, all
|
||||
* with a zero encryption key).
|
||||
*/
|
||||
aes_final ( &aes_mixcolumns, AES_STRIDE_SHIFTROWS,
|
||||
enc++, &temp, &zero );
|
||||
aes_decrypt_rounds ( &temp, dec--, &zero, 1 );
|
||||
}
|
||||
memcpy ( dec--, enc++, sizeof ( *dec ) );
|
||||
DBGC2 ( aes, "AES %p inverted %zd-bit key:\n", aes, ( keylen * 8 ) );
|
||||
DBGC2_HDA ( aes, 0, &aes->decrypt, ( rounds * sizeof ( *dec ) ) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set initialisation vector
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v iv Initialisation vector
|
||||
*/
|
||||
static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
|
||||
/* Nothing to do */
|
||||
}
|
||||
|
||||
/** Basic AES algorithm */
|
||||
struct cipher_algorithm aes_algorithm = {
|
||||
.name = "aes",
|
||||
.ctxsize = sizeof ( struct aes_context ),
|
||||
.blocksize = AES_BLOCKSIZE,
|
||||
.setkey = aes_setkey,
|
||||
.setiv = aes_setiv,
|
||||
.encrypt = aes_encrypt,
|
||||
.decrypt = aes_decrypt,
|
||||
};
|
||||
|
||||
/* AES in Electronic Codebook mode */
|
||||
ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
|
||||
aes_algorithm, struct aes_context, AES_BLOCKSIZE );
|
||||
|
||||
/* AES in Cipher Block Chaining mode */
|
||||
CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
|
||||
aes_algorithm, struct aes_context, AES_BLOCKSIZE );
|
|
@ -1,457 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2007, Cameron Rich
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* * Neither the name of the axTLS project nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* AES implementation - this is a small code version. There are much faster
|
||||
* versions around but they are much larger in size (i.e. they use large
|
||||
* submix tables).
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include "os_port.h"
|
||||
#include "crypto.h"
|
||||
|
||||
/* all commented out in skeleton mode */
|
||||
#ifndef CONFIG_SSL_SKELETON_MODE
|
||||
|
||||
#define rot1(x) (((x) << 24) | ((x) >> 8))
|
||||
#define rot2(x) (((x) << 16) | ((x) >> 16))
|
||||
#define rot3(x) (((x) << 8) | ((x) >> 24))
|
||||
|
||||
/*
|
||||
* This cute trick does 4 'mul by two' at once. Stolen from
|
||||
* Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
|
||||
* a standard graphics trick
|
||||
* The key to this is that we need to xor with 0x1b if the top bit is set.
|
||||
* a 1xxx xxxx 0xxx 0xxx First we mask the 7bit,
|
||||
* b 1000 0000 0000 0000 then we shift right by 7 putting the 7bit in 0bit,
|
||||
* c 0000 0001 0000 0000 we then subtract (c) from (b)
|
||||
* d 0111 1111 0000 0000 and now we and with our mask
|
||||
* e 0001 1011 0000 0000
|
||||
*/
|
||||
#define mt 0x80808080
|
||||
#define ml 0x7f7f7f7f
|
||||
#define mh 0xfefefefe
|
||||
#define mm 0x1b1b1b1b
|
||||
#define mul2(x,t) ((t)=((x)&mt), \
|
||||
((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
|
||||
|
||||
#define inv_mix_col(x,f2,f4,f8,f9) (\
|
||||
(f2)=mul2(x,f2), \
|
||||
(f4)=mul2(f2,f4), \
|
||||
(f8)=mul2(f4,f8), \
|
||||
(f9)=(x)^(f8), \
|
||||
(f8)=((f2)^(f4)^(f8)), \
|
||||
(f2)^=(f9), \
|
||||
(f4)^=(f9), \
|
||||
(f8)^=rot3(f2), \
|
||||
(f8)^=rot2(f4), \
|
||||
(f8)^rot1(f9))
|
||||
|
||||
/*
|
||||
* AES S-box
|
||||
*/
|
||||
static const uint8_t aes_sbox[256] =
|
||||
{
|
||||
0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
|
||||
0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
|
||||
0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
|
||||
0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
|
||||
0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
|
||||
0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
|
||||
0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
|
||||
0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
|
||||
0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
|
||||
0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
|
||||
0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
|
||||
0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
|
||||
0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
|
||||
0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
|
||||
0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
|
||||
0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
|
||||
0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
|
||||
0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
|
||||
0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
|
||||
0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
|
||||
0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
|
||||
0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
|
||||
0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
|
||||
0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
|
||||
0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
|
||||
0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
|
||||
0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
|
||||
0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
|
||||
0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
|
||||
0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
|
||||
0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
|
||||
0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
|
||||
};
|
||||
|
||||
/*
|
||||
* AES is-box
|
||||
*/
|
||||
static const uint8_t aes_isbox[256] =
|
||||
{
|
||||
0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
|
||||
0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
|
||||
0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
|
||||
0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
|
||||
0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
|
||||
0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
|
||||
0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
|
||||
0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
|
||||
0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
|
||||
0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
|
||||
0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
|
||||
0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
|
||||
0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
|
||||
0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
|
||||
0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
|
||||
0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
|
||||
0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
|
||||
0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
|
||||
0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
|
||||
0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
|
||||
0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
|
||||
0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
|
||||
0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
|
||||
0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
|
||||
0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
|
||||
0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
|
||||
0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
|
||||
0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
|
||||
0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
|
||||
0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
|
||||
0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
|
||||
0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
|
||||
};
|
||||
|
||||
static const unsigned char Rcon[30]=
|
||||
{
|
||||
0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
|
||||
0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
|
||||
0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
|
||||
0xb3,0x7d,0xfa,0xef,0xc5,0x91,
|
||||
};
|
||||
|
||||
/* ----- static functions ----- */
|
||||
static void AES_encrypt(const AES_CTX *ctx, uint32_t *data);
|
||||
static void AES_decrypt(const AES_CTX *ctx, uint32_t *data);
|
||||
|
||||
/* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
|
||||
x^8+x^4+x^3+x+1 */
|
||||
static unsigned char AES_xtime(uint32_t x)
|
||||
{
|
||||
return (x&0x80) ? (x<<1)^0x1b : x<<1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up AES with the key/iv and cipher size.
|
||||
*/
|
||||
void AES_set_key(AES_CTX *ctx, const uint8_t *key,
|
||||
const uint8_t *iv, AES_MODE mode)
|
||||
{
|
||||
int i, ii;
|
||||
uint32_t *W, tmp, tmp2;
|
||||
const unsigned char *ip;
|
||||
int words;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case AES_MODE_128:
|
||||
i = 10;
|
||||
words = 4;
|
||||
break;
|
||||
|
||||
case AES_MODE_256:
|
||||
i = 14;
|
||||
words = 8;
|
||||
break;
|
||||
|
||||
default: /* fail silently */
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->rounds = i;
|
||||
ctx->key_size = words;
|
||||
W = ctx->ks;
|
||||
for (i = 0; i < words; i+=2)
|
||||
{
|
||||
W[i+0]= ((uint32_t)key[ 0]<<24)|
|
||||
((uint32_t)key[ 1]<<16)|
|
||||
((uint32_t)key[ 2]<< 8)|
|
||||
((uint32_t)key[ 3] );
|
||||
W[i+1]= ((uint32_t)key[ 4]<<24)|
|
||||
((uint32_t)key[ 5]<<16)|
|
||||
((uint32_t)key[ 6]<< 8)|
|
||||
((uint32_t)key[ 7] );
|
||||
key += 8;
|
||||
}
|
||||
|
||||
ip = Rcon;
|
||||
ii = 4 * (ctx->rounds+1);
|
||||
for (i = words; i<ii; i++)
|
||||
{
|
||||
tmp = W[i-1];
|
||||
|
||||
if ((i % words) == 0)
|
||||
{
|
||||
tmp2 =(uint32_t)aes_sbox[(tmp )&0xff]<< 8;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>>24) ];
|
||||
tmp=tmp2^(((unsigned int)*ip)<<24);
|
||||
ip++;
|
||||
}
|
||||
|
||||
if ((words == 8) && ((i % words) == 4))
|
||||
{
|
||||
tmp2 =(uint32_t)aes_sbox[(tmp )&0xff] ;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
|
||||
tmp2|=(uint32_t)aes_sbox[(tmp>>24) ]<<24;
|
||||
tmp=tmp2;
|
||||
}
|
||||
|
||||
W[i]=W[i-words]^tmp;
|
||||
}
|
||||
|
||||
/* copy the iv across */
|
||||
memcpy(ctx->iv, iv, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change a key for decryption.
|
||||
*/
|
||||
void AES_convert_key(AES_CTX *ctx)
|
||||
{
|
||||
int i;
|
||||
uint32_t *k,w,t1,t2,t3,t4;
|
||||
|
||||
k = ctx->ks;
|
||||
k += 4;
|
||||
|
||||
for (i= ctx->rounds*4; i > 4; i--)
|
||||
{
|
||||
w= *k;
|
||||
w = inv_mix_col(w,t1,t2,t3,t4);
|
||||
*k++ =w;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypt a byte sequence (with a block size 16) using the AES cipher.
|
||||
*/
|
||||
void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
|
||||
{
|
||||
int i;
|
||||
uint32_t tin[4], tout[4], iv[4];
|
||||
|
||||
memcpy(iv, ctx->iv, AES_IV_SIZE);
|
||||
for (i = 0; i < 4; i++)
|
||||
tout[i] = ntohl(iv[i]);
|
||||
|
||||
for (length -= AES_BLOCKSIZE; length >= 0; length -= AES_BLOCKSIZE)
|
||||
{
|
||||
uint32_t msg_32[4];
|
||||
uint32_t out_32[4];
|
||||
memcpy(msg_32, msg, AES_BLOCKSIZE);
|
||||
msg += AES_BLOCKSIZE;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
tin[i] = ntohl(msg_32[i])^tout[i];
|
||||
|
||||
AES_encrypt(ctx, tin);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
tout[i] = tin[i];
|
||||
out_32[i] = htonl(tout[i]);
|
||||
}
|
||||
|
||||
memcpy(out, out_32, AES_BLOCKSIZE);
|
||||
out += AES_BLOCKSIZE;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
iv[i] = htonl(tout[i]);
|
||||
memcpy(ctx->iv, iv, AES_IV_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt a byte sequence (with a block size 16) using the AES cipher.
|
||||
*/
|
||||
void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
|
||||
{
|
||||
int i;
|
||||
uint32_t tin[4], xor[4], tout[4], data[4], iv[4];
|
||||
|
||||
memcpy(iv, ctx->iv, AES_IV_SIZE);
|
||||
for (i = 0; i < 4; i++)
|
||||
xor[i] = ntohl(iv[i]);
|
||||
|
||||
for (length -= 16; length >= 0; length -= 16)
|
||||
{
|
||||
uint32_t msg_32[4];
|
||||
uint32_t out_32[4];
|
||||
memcpy(msg_32, msg, AES_BLOCKSIZE);
|
||||
msg += AES_BLOCKSIZE;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
tin[i] = ntohl(msg_32[i]);
|
||||
data[i] = tin[i];
|
||||
}
|
||||
|
||||
AES_decrypt(ctx, data);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
tout[i] = data[i]^xor[i];
|
||||
xor[i] = tin[i];
|
||||
out_32[i] = htonl(tout[i]);
|
||||
}
|
||||
|
||||
memcpy(out, out_32, AES_BLOCKSIZE);
|
||||
out += AES_BLOCKSIZE;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
iv[i] = htonl(xor[i]);
|
||||
memcpy(ctx->iv, iv, AES_IV_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypt a single block (16 bytes) of data
|
||||
*/
|
||||
static void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
|
||||
{
|
||||
/* To make this code smaller, generate the sbox entries on the fly.
|
||||
* This will have a really heavy effect upon performance.
|
||||
*/
|
||||
uint32_t tmp[4];
|
||||
uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
|
||||
int curr_rnd;
|
||||
int rounds = ctx->rounds;
|
||||
const uint32_t *k = ctx->ks;
|
||||
|
||||
/* Pre-round key addition */
|
||||
for (row = 0; row < 4; row++)
|
||||
data[row] ^= *(k++);
|
||||
|
||||
/* Encrypt one block. */
|
||||
for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
|
||||
{
|
||||
/* Perform ByteSub and ShiftRow operations together */
|
||||
for (row = 0; row < 4; row++)
|
||||
{
|
||||
a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
|
||||
a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
|
||||
a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF];
|
||||
a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
|
||||
|
||||
/* Perform MixColumn iff not last round */
|
||||
if (curr_rnd < (rounds - 1))
|
||||
{
|
||||
tmp1 = a0 ^ a1 ^ a2 ^ a3;
|
||||
old_a0 = a0;
|
||||
a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
|
||||
a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
|
||||
a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
|
||||
a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
|
||||
}
|
||||
|
||||
tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
|
||||
}
|
||||
|
||||
/* KeyAddition - note that it is vital that this loop is separate from
|
||||
the MixColumn operation, which must be atomic...*/
|
||||
for (row = 0; row < 4; row++)
|
||||
data[row] = tmp[row] ^ *(k++);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt a single block (16 bytes) of data
|
||||
*/
|
||||
static void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
|
||||
{
|
||||
uint32_t tmp[4];
|
||||
uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
|
||||
uint32_t a0, a1, a2, a3, row;
|
||||
int curr_rnd;
|
||||
int rounds = ctx->rounds;
|
||||
const uint32_t *k = ctx->ks + ((rounds+1)*4);
|
||||
|
||||
/* pre-round key addition */
|
||||
for (row=4; row > 0;row--)
|
||||
data[row-1] ^= *(--k);
|
||||
|
||||
/* Decrypt one block */
|
||||
for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
|
||||
{
|
||||
/* Perform ByteSub and ShiftRow operations together */
|
||||
for (row = 4; row > 0; row--)
|
||||
{
|
||||
a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
|
||||
a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
|
||||
a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
|
||||
a3 = aes_isbox[(data[row%4])&0xFF];
|
||||
|
||||
/* Perform MixColumn iff not last round */
|
||||
if (curr_rnd<(rounds-1))
|
||||
{
|
||||
/* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
|
||||
are quite large compared to encryption; this
|
||||
operation slows decryption down noticeably. */
|
||||
xt0 = AES_xtime(a0^a1);
|
||||
xt1 = AES_xtime(a1^a2);
|
||||
xt2 = AES_xtime(a2^a3);
|
||||
xt3 = AES_xtime(a3^a0);
|
||||
xt4 = AES_xtime(xt0^xt1);
|
||||
xt5 = AES_xtime(xt1^xt2);
|
||||
xt6 = AES_xtime(xt4^xt5);
|
||||
|
||||
xt0 ^= a1^a2^a3^xt4^xt6;
|
||||
xt1 ^= a0^a2^a3^xt5^xt6;
|
||||
xt2 ^= a0^a1^a3^xt4^xt6;
|
||||
xt3 ^= a0^a1^a2^xt5^xt6;
|
||||
tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
|
||||
}
|
||||
else
|
||||
tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
|
||||
}
|
||||
|
||||
for (row = 4; row > 0; row--)
|
||||
data[row-1] = tmp[row-1] ^ *(--k);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,165 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER );
|
||||
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
#include <byteswap.h>
|
||||
#include <ipxe/crypto.h>
|
||||
#include <ipxe/ecb.h>
|
||||
#include <ipxe/cbc.h>
|
||||
#include <ipxe/aes.h>
|
||||
#include "crypto/axtls/crypto.h"
|
||||
|
||||
/** @file
|
||||
*
|
||||
* AES algorithm
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Set key
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v key Key
|
||||
* @v keylen Key length
|
||||
* @ret rc Return status code
|
||||
*/
|
||||
static int aes_setkey ( void *ctx, const void *key, size_t keylen ) {
|
||||
struct aes_context *aes_ctx = ctx;
|
||||
AES_MODE mode;
|
||||
void *iv;
|
||||
|
||||
switch ( keylen ) {
|
||||
case ( 128 / 8 ):
|
||||
mode = AES_MODE_128;
|
||||
break;
|
||||
case ( 256 / 8 ):
|
||||
mode = AES_MODE_256;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* IV is not a relevant concept at this stage; use a dummy
|
||||
* value that will have no side-effects.
|
||||
*/
|
||||
iv = &aes_ctx->axtls_ctx.iv;
|
||||
|
||||
AES_set_key ( &aes_ctx->axtls_ctx, key, iv, mode );
|
||||
|
||||
aes_ctx->decrypting = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set initialisation vector
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v iv Initialisation vector
|
||||
*/
|
||||
static void aes_setiv ( void *ctx __unused, const void *iv __unused ) {
|
||||
/* Nothing to do */
|
||||
}
|
||||
|
||||
/**
|
||||
* Call AXTLS' AES_encrypt() or AES_decrypt() functions
|
||||
*
|
||||
* @v axtls_ctx AXTLS AES context
|
||||
* @v src Data to process
|
||||
* @v dst Buffer for output
|
||||
* @v func AXTLS AES function to call
|
||||
*/
|
||||
static void aes_call_axtls ( AES_CTX *axtls_ctx, const void *src, void *dst,
|
||||
void ( * func ) ( const AES_CTX *axtls_ctx,
|
||||
uint32_t *data ) ){
|
||||
const uint32_t *srcl = src;
|
||||
uint32_t *dstl = dst;
|
||||
unsigned int i;
|
||||
|
||||
/* AXTLS' AES_encrypt() and AES_decrypt() functions both
|
||||
* expect to deal with an array of four dwords in host-endian
|
||||
* order.
|
||||
*/
|
||||
for ( i = 0 ; i < 4 ; i++ )
|
||||
dstl[i] = ntohl ( srcl[i] );
|
||||
func ( axtls_ctx, dstl );
|
||||
for ( i = 0 ; i < 4 ; i++ )
|
||||
dstl[i] = htonl ( dstl[i] );
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypt data
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v src Data to encrypt
|
||||
* @v dst Buffer for encrypted data
|
||||
* @v len Length of data
|
||||
*/
|
||||
static void aes_encrypt ( void *ctx, const void *src, void *dst,
|
||||
size_t len ) {
|
||||
struct aes_context *aes_ctx = ctx;
|
||||
|
||||
assert ( len == AES_BLOCKSIZE );
|
||||
if ( aes_ctx->decrypting )
|
||||
assert ( 0 );
|
||||
aes_call_axtls ( &aes_ctx->axtls_ctx, src, dst, axtls_aes_encrypt );
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt data
|
||||
*
|
||||
* @v ctx Context
|
||||
* @v src Data to decrypt
|
||||
* @v dst Buffer for decrypted data
|
||||
* @v len Length of data
|
||||
*/
|
||||
static void aes_decrypt ( void *ctx, const void *src, void *dst,
|
||||
size_t len ) {
|
||||
struct aes_context *aes_ctx = ctx;
|
||||
|
||||
assert ( len == AES_BLOCKSIZE );
|
||||
if ( ! aes_ctx->decrypting ) {
|
||||
AES_convert_key ( &aes_ctx->axtls_ctx );
|
||||
aes_ctx->decrypting = 1;
|
||||
}
|
||||
aes_call_axtls ( &aes_ctx->axtls_ctx, src, dst, axtls_aes_decrypt );
|
||||
}
|
||||
|
||||
/** Basic AES algorithm */
|
||||
struct cipher_algorithm aes_algorithm = {
|
||||
.name = "aes",
|
||||
.ctxsize = sizeof ( struct aes_context ),
|
||||
.blocksize = AES_BLOCKSIZE,
|
||||
.setkey = aes_setkey,
|
||||
.setiv = aes_setiv,
|
||||
.encrypt = aes_encrypt,
|
||||
.decrypt = aes_decrypt,
|
||||
};
|
||||
|
||||
/* AES in Electronic Codebook mode */
|
||||
ECB_CIPHER ( aes_ecb, aes_ecb_algorithm,
|
||||
aes_algorithm, struct aes_context, AES_BLOCKSIZE );
|
||||
|
||||
/* AES in Cipher Block Chaining mode */
|
||||
CBC_CIPHER ( aes_cbc, aes_cbc_algorithm,
|
||||
aes_algorithm, struct aes_context, AES_BLOCKSIZE );
|
|
@ -1,30 +1,49 @@
|
|||
#ifndef _IPXE_AES_H
|
||||
#define _IPXE_AES_H
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER );
|
||||
/** @file
|
||||
*
|
||||
* AES algorithm
|
||||
*
|
||||
*/
|
||||
|
||||
struct cipher_algorithm;
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
/** Basic AES blocksize */
|
||||
#include <ipxe/crypto.h>
|
||||
|
||||
/** AES blocksize */
|
||||
#define AES_BLOCKSIZE 16
|
||||
|
||||
#include "crypto/axtls/crypto.h"
|
||||
/** Maximum number of AES rounds */
|
||||
#define AES_MAX_ROUNDS 15
|
||||
|
||||
/** AES matrix */
|
||||
union aes_matrix {
|
||||
/** Viewed as an array of bytes */
|
||||
uint8_t byte[16];
|
||||
/** Viewed as an array of four-byte columns */
|
||||
uint32_t column[4];
|
||||
} __attribute__ (( packed ));
|
||||
|
||||
/** AES round keys */
|
||||
struct aes_round_keys {
|
||||
/** Round keys */
|
||||
union aes_matrix key[AES_MAX_ROUNDS];
|
||||
};
|
||||
|
||||
/** AES context */
|
||||
struct aes_context {
|
||||
/** AES context for AXTLS */
|
||||
AES_CTX axtls_ctx;
|
||||
/** Cipher is being used for decrypting */
|
||||
int decrypting;
|
||||
/** Encryption keys */
|
||||
struct aes_round_keys encrypt;
|
||||
/** Decryption keys */
|
||||
struct aes_round_keys decrypt;
|
||||
/** Number of rounds */
|
||||
unsigned int rounds;
|
||||
};
|
||||
|
||||
/** AES context size */
|
||||
#define AES_CTX_SIZE sizeof ( struct aes_context )
|
||||
|
||||
/* AXTLS functions */
|
||||
extern void axtls_aes_encrypt ( const AES_CTX *ctx, uint32_t *data );
|
||||
extern void axtls_aes_decrypt ( const AES_CTX *ctx, uint32_t *data );
|
||||
|
||||
extern struct cipher_algorithm aes_algorithm;
|
||||
extern struct cipher_algorithm aes_ecb_algorithm;
|
||||
extern struct cipher_algorithm aes_cbc_algorithm;
|
||||
|
|
|
@ -264,7 +264,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
|||
#define ERRFILE_imgmgmt ( ERRFILE_OTHER | 0x00050000 )
|
||||
#define ERRFILE_pxe_tftp ( ERRFILE_OTHER | 0x00060000 )
|
||||
#define ERRFILE_pxe_udp ( ERRFILE_OTHER | 0x00070000 )
|
||||
#define ERRFILE_axtls_aes ( ERRFILE_OTHER | 0x00080000 )
|
||||
#define ERRFILE_aes ( ERRFILE_OTHER | 0x00080000 )
|
||||
#define ERRFILE_cipher ( ERRFILE_OTHER | 0x00090000 )
|
||||
#define ERRFILE_image_cmd ( ERRFILE_OTHER | 0x000a0000 )
|
||||
#define ERRFILE_uri_test ( ERRFILE_OTHER | 0x000b0000 )
|
||||
|
|
Reference in New Issue