;
; Copyright @ F. Durvaux, L. Lerman, F. Koeune, 2014-2016.
; e-mail: <francois.durvaux@uclouvain.be>, <francois.koeune@uclouvain.be>, <llerman@ulb.ac.be>
;
;
; To the extent possible under law, the author(s) have dedicated all
; copyright and related and neighboring rights to this software to the
; public domain worldwide. This software is distributed without any
; warranty.
;
; You should have received a copy of the CC0 Public Domain Dedication
; along with this software. If not, see
; <http://creativecommons.org/publicdomain/zero/1.0/>



.cseg

 ; ==================================================================================================
 ; Copy r18 bytes from SRAM location pointed by ZH:ZL to SRAM location pointed by XH:XL
 ; 
 ;	r18 must contain the number of byte to copy
 ;	r15 is used for internal computation
 ; ==================================================================================================

copy_word_RAM_to_RAM:
	ld		r15, Z+
	st 		X+,r15
	dec		r18
	brbc	1, copy_word_RAM_to_RAM
	ret
	
 ; ==================================================================================================
 ; TAE mode : initialisation of key and nonce
 ; IN : 
 ;		- ZH:ZL : address of Nonce, of length NONCE_NUM_BYTE
 ;		- XH:XL : address of key
 ;
 ; EFFECT : 
 ;		- Copy nonce to SRAM_TWEAK
 ;		- Copy key to SRAM_KEY
 ;
 ; REMARK : not needed if key and nonce already in place
 ; ==================================================================================================

 TAE_Init : 
	MOV		YH, XH	; save X
	MOV		YL, XL
	LDI		r18, NONCE_NUM_BYTE
	LDI		XH, HIGH(SRAM_TWEAK)
	LDI		XL, LOW(SRAM_TWEAK)
	RCALL	copy_word_RAM_to_RAM
	MOV		ZH, YH
	MOV		ZL, YL
	LDI		XH, HIGH(SRAM_KEY)
	LDI		XL, LOW(SRAM_KEY)
	LDI		R18, BLOCK_NUM_BYTE
	RCALL	copy_word_RAM_to_RAM
	RET

 ; ==================================================================================================
 ; TAE mode : initialisation of associated data processing
 ; 
 ; EFFECT :
 ;		- Initialize SRAM_TAG (corresponding to auth in specs) to 00000000
 ;		- Initialize SRAM_TWEAK to first tweak value
 ; 
 ; REMARK : MUST be called, even if no associated data present
 ;
 ;	r16, r18, Z are used for internal computation
 ; ==================================================================================================

TAE_Init_AData:
	LDI		ZH, HIGH(SRAM_TAG)
	LDI		ZL, LOW(SRAM_TAG)
	LDI		R18, BLOCK_NUM_BYTE
	LDI		R16, 0
TAE_IAData_loop1:
	ST		Z+, R16
	DEC		R18
	BRBC	1, TAE_IAData_loop1

	LDI		ZH, HIGH(SRAM_TWEAK)		; prepare tweak
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE		;		skip part where N already is
	LDI		R16, 0						;		pad with (NONCE_PAD_NUM_BYTE + 2) zeros 
	LDI		R18, NONCE_PAD_NUM_BYTE
	SUBI	R18, $FE	; +2

TAE_IADATA_loop2:
	ST		Z+, R16
	DEC		R18
	BRBC	1, TAE_IADATA_loop2

	LDI		R16, $02
	ST		Z, R16
	RET

 ; ==================================================================================================
 ; TAE mode : processing of next (non-last) associated data block
 ; 
 ; IN :
 ;		- ZH:ZL : address of associated data block
 ; EFFECT :
 ;		- Update SRAM_TAG (corresponding to auth in specs)
 ;		- update SRAM_TWEAK for next associated data block
 ;	
 ; All registers are potentially used
 ; ==================================================================================================
  
TAE_Next_AData:

	PUSH	ZH
	PUSH	ZL
	LDI		XH, high(SRAM_KEY)
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys

	POP		ZL
	POP		ZH
	RCALL	encrypt

	LDI		ZH, HIGH(SRAM_TAG)
	LDI		ZL, LOW(SRAM_TAG)
	RCALL	key_addition
	LDI		ZH, HIGH(SRAM_TAG)
	LDI		ZL, LOW(SRAM_TAG)
	RCALL	store_state

	LDI		ZH, HIGH(SRAM_TWEAK)
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE
	ld		xh, z+							; increment c in tweak 
	ld		xl, z+
	adiw	xh:xl,1
	st		-z, xl
	st		-z, xh
	ret

 ; ==================================================================================================
 ; TAE mode : processing of last (full or not) associated data block
 ; 
 ; IN :
 ;		- ZH:ZL : address of last associated data block
 ;		- r16 : size of last block
 ;
 ; EFFECT :
 ;		- Update SRAM_TAG (corresponding to auth in specs)
 ;
 ; REMARK :
 ;		- MUST be called for the last associated data block, even it it is a full block
 ;		- MAY NOT be called if no associated data at all
 ;	
 ; All registers are potentially used
 ; ==================================================================================================
  
TAE_Last_AData:

	mov		r18, r16
	ldi		xh,high(SRAM_BUF)			; remark : strictly speaking, we could avoid copying the data to buf when the block is full, but the perf. gain is very small
	ldi		xl,low(SRAM_BUF)
	rcall	copy_word_RAM_to_RAM

	cpi		r16, 16
	brbc	1, TAE_LAdata_notfull

	LDI		ZH, HIGH(SRAM_TWEAK)
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE + 2
	ldi		r17, $4
	st		z, r17
	rjmp	TAE_LAdata_BC

TAE_LAdata_notfull:
										; append 1 bit '1' to associated data
	ldi		r17,$80
	st		x+,r17
	ldi		r17,15						; fill with zeroes
	sub		r17,r16
	brbs	1,TAE_LAdata_loop1done
	ldi		r16,0
TAE_LAdata_loop1:
	st		x+,r16
	dec		r17
	brbc	1, TAE_LAdata_loop1

TAE_LAdata_loop1done:
	LDI		ZH, HIGH(SRAM_TWEAK)
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE + 2
	ldi		r17, $6
	st		z, r17

TAE_LAdata_BC:
	LDI		XH, high(SRAM_KEY)
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	ldi		zh,high(SRAM_BUF)
	ldi		zl,low(SRAM_BUF)
	RCALL	encrypt
	LDI		ZH, HIGH(SRAM_TAG)
	LDI		ZL, LOW(SRAM_TAG)
	RCALL	key_addition
	LDI		ZH, HIGH(SRAM_TAG)
	LDI		ZL, LOW(SRAM_TAG)
	RCALL	store_state

	ret

 ; ==================================================================================================
 ; TAE mode : initialisation of plaintext processing (both before encryption and decryption)
 ; 
 ; EFFECT :
 ;		- Initialize SRAM_SIGMA (corresponding to auth in specs) to 00000000
 ;		- Initialize SRAM_TWEAK to first tweak value
 ; 
 ; REMARKS : 
 ;		- MUST be called, even if no data to encrypt/decrypt
 ;		- TAE_Init_Encrypt and TAE_Init_Decrypt are in fact the same fucntion : given both names 
 ;		  for clarity
 ;
 ;	r16, r18, Z are used for internal computation
 ; ==================================================================================================

TAE_Init_Encrypt:
TAE_Init_Decrypt:
	LDI		ZH, HIGH(SRAM_SIGMA)
	LDI		ZL, LOW(SRAM_SIGMA)
	LDI		R18, BLOCK_NUM_BYTE
	LDI		R16, 0
TAE_IE_loop1:
	ST		Z+, R16
	DEC		R18
	BRBC	1, TAE_IE_loop1

	LDI		ZH, HIGH(SRAM_TWEAK)		; prepare tweak
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE		;		skip part where N already is
	LDI		R16, 0						;		pad with (NONCE_PAD_NUM_BYTE + 2) zeros 
	LDI		R18, NONCE_PAD_NUM_BYTE
	SUBI	R18, $FD	  ;+3

TAE_IE_loop2:
	ST		Z+, R16
	DEC		R18
	BRBC	1, TAE_IE_loop2

	RET

 ; ==================================================================================================
 ; TAE mode : processing of next (non-last) plaintext block
 ; 
 ; IN :
 ;		- ZH:ZL : address of plaintext block
 ;
 ; EFFECT :
 ;		- Update SRAM_SIGMA
 ;		- encrypts plaintext and overwrites it with corresponding ciphertext
 ;		- update SRAM_TWEAK for next plaintext block
 ;	
 ; All registers are potentially used
 ; ==================================================================================================
  
TAE_Next_Encrypt:

	PUSH	ZH
	PUSH	ZL

	ldi		xh, high(SRAM_SIGMA)		; sigma = sigma ^ pt
	ldi		xl, low(SRAM_SIGMA)
	ldi		r18, BLOCK_NUM_BYTE
TAE_N_loop1:
	ld		r16, z+
	ld		r17, x
	eor		r16, r17
	st		x+, r16
	dec		r18
	brbc	1, TAE_N_loop1

	LDI		XH, high(SRAM_KEY)			; encrypt pt
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys

	POP		ZL
	POP		ZH
	RCALL	encrypt

	LDI		ZH, HIGH(SRAM_TWEAK)		; update tweak
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE
	ld		xh, z+
	ld		xl, z+
	adiw	xh:xl,1
	st		-z, xl
	st		-z, xh

	ret

 ; ==================================================================================================
 ; TAE mode : processing of last (full or not) plaintext block
 ; 
 ; IN :
 ;		- ZH:ZL : address of last plaintext block
 ;		- r16 : size of last block
 ;
 ; EFFECT :
 ;		- encrypt plaintext and overwrite it with corresponding ciphertext
 ;		- compute final tag and store it in SRAM_TAG
 ;
 ; All registers are potentially used
 ; ==================================================================================================

TAE_Last_Encrypt:
	push	r16
	push	zh
	push	zl
	cpi		r16,16
	brbc	1, TAE_LE_notfull

	rcall	TAE_Next_Encrypt			; last block full : process as all other ones
	rjmp	TAE_LE_pregentag
TAE_LE_notfull:
	ldi		xh, high(SRAM_SIGMA)		; sigma = sigma ^ pt
	ldi		xl, low(SRAM_SIGMA)
	mov		r18,r16
TAE_LE_loop1:
	ld		r19, z+
	ld		r17, x
	eor		r19, r17
	st		x+, r19
	dec		r18
	brbc	1, TAE_LE_loop1
	ldi		zh, high(SRAM_BUF)			; build message equal to last block length
	ldi		zl, low(SRAM_BUF)
	ldi		r17, 0
	ldi		r18,15
TEA_LE_loop2:
	st		z+, r17
	dec		r18
	brbc	1, TEA_LE_loop2
	mov		r17, r16
	lsl		r17
	lsl		r17
	lsl		r17
	st		z+,r17						; encrypt this message

	LDI		XH, high(SRAM_KEY)
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	ldi		zh, high(SRAM_BUF)
	ldi		zl, low(SRAM_BUF)
	RCALL	encrypt
	ldi		xh, high(SRAM_BUF)			; xor ciphertext with pt
	ldi		xl, low(SRAM_BUF)
	pop		zl
	pop		zh
	pop		r16
	mov		r19, r16
TAE_LE_loop3:
	ld		r17, x+
	ld		r18, z
	eor		r17, r18
	st		z+, r17
	dec		r19
	brbc	1, TAE_LE_loop3
	rjmp	TAE_LE_gentag

TAE_LE_pregentag:
	pop		zl
	pop		zh
	pop		r16

TAE_LE_gentag:

	ldi		zh, HIGH(SRAM_TWEAK)		; update tweak
	ldi		zl, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE + 2
	ldi		r19, 1
	cpi		r16,16
	brbs	1, TAE_LE_tweakdone			;		if last block full, ok
	lsl		r16							;		else add length << 1
	lsl		r16
	lsl		r16
	lsl		r16
	eor		r19, r16
TAE_LE_tweakdone:
	st		z, r19
	LDI		XH, high(SRAM_KEY)			; encrypt sigma
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	ldi		zh, high(SRAM_SIGMA)
	ldi		zl, low(SRAM_SIGMA)
	rcall	encrypt
	ldi		xh, high(SRAM_SIGMA)
	ldi		xl, low(SRAM_SIGMA)
	ldi		zh, high(SRAM_TAG)
	ldi		zl, low(SRAM_TAG)
	ldi		r19, 16
TAE_LE_loop4:
	ld		r17, x+
	ld		r18, z
	eor		r17, r18
	st		z+, r17
	dec		r19
	brbc	1, TAE_LE_loop4
	ret




 ; ==================================================================================================
 ; TAE mode : processing of next (non-last) ciphertext block
 ; 
 ; IN :
 ;		- ZH:ZL : address of ciphertext block
 ;
 ; EFFECT :
 ;		- decrypts ciphertext and overwrites it with corresponding plaintext
 ;		- Update SRAM_SIGMA
 ;		- update SRAM_TWEAK for next plaintext block
 ;	
 ; All registers are potentially used
 ; ==================================================================================================
  
TAE_Next_Decrypt:

	PUSH	ZH
	PUSH	ZL

	LDI		XH, high(SRAM_KEY)			; decrypt ct
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	POP		ZL
	POP		ZH
	RCALL	decrypt

	sbiw	z, 16
	ldi		xh, high(SRAM_SIGMA)		; sigma = sigma ^ pt
	ldi		xl, low(SRAM_SIGMA)
	ldi		r18, BLOCK_NUM_BYTE
TAE_ND_loop1:
	ld		r16, z+
	ld		r17, x
	eor		r16, r17
	st		x+, r16
	dec		r18
	brbc	1, TAE_ND_loop1

	LDI		ZH, HIGH(SRAM_TWEAK)		; update tweak
	LDI		ZL, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE
	ld		xh, z+
	ld		xl, z+
	adiw	xh:xl,1
	st		-z, xl
	st		-z, xh

	ret

 ; ==================================================================================================
 ; TAE mode : processing of last (full or not) ciphertext block
 ; 
 ; IN :
 ;		- ZH:ZL : address of last associated data block
 ;		- r16 : size of last block
 ;		- XH:XL : address of tag
 ;
 ; EFFECT :
 ;		- decrypt ciphertext and overwrites it with corresponding plaintext
 ;		- compute final tag and compare it with tag passed at XH:XL 
 ;			--> set r16 to 1 if tags match, and to 0 otherwise
 ;
 ; REMARK :
 ;		- MUST be called for the last associated data block, even it it is a full block
 ;		- MAY NOT be called if no associated data at all
 ;	
 ; All registers are potentially used
 ; ==================================================================================================

TAE_Last_Decrypt:
	push	xh
	push	xl
	push	r16
	push	zh
	push	zl
	cpi		r16,16
	brbc	1, TAE_LD_notfull

	rcall	TAE_Next_Decrypt			; last block full : process as all other ones
	rjmp	TAE_LD_pregentag

TAE_LD_notfull:
	ldi		zh, high(SRAM_BUF)			; build message equal to last block length
	ldi		zl, low(SRAM_BUF)
	ldi		r17, 0
	ldi		r18,15
TEA_LD_loop1:
	st		z+, r17
	dec		r18
	brbc	1, TEA_LD_loop1
	mov		r17, r16
	lsl		r17
	lsl		r17
	lsl		r17
	st		z+,r17						; encrypt this message

	LDI		XH, high(SRAM_KEY)
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	ldi		zh, high(SRAM_BUF)
	ldi		zl, low(SRAM_BUF)
	RCALL	encrypt
	ldi		xh, high(SRAM_BUF)			; xor ciphertext with pt
	ldi		xl, low(SRAM_BUF)
	pop		zl
	pop		zh
	pop		r16
	mov		r19, r16
TAE_LD_loop2:
	ld		r17, x+
	ld		r18, z
	eor		r17, r18
	st		z+, r17
	dec		r19
	brbc	1, TAE_LD_loop2

	ldi		xh, high(SRAM_SIGMA)		; sigma = sigma ^ pt
	ldi		xl, low(SRAM_SIGMA)
	sub		zl, r16
	sbci	zh,0
	mov		r18,r16
TAE_LD_loop3:
	ld		r19, z+
	ld		r17, x
	eor		r19, r17
	st		x+, r19
	dec		r18
	brbc	1, TAE_LD_loop3
	rjmp	TAE_LD_gentag

TAE_LD_pregentag:
	pop		zl
	pop		zh
	pop		r16
										
TAE_LD_gentag:
	ldi		zh, HIGH(SRAM_TWEAK)		; update tweak
	ldi		zl, LOW(SRAM_TWEAK)
	ADIW	ZH:ZL, NONCE_NUM_BYTE + NONCE_PAD_NUM_BYTE + 2
	ldi		r19, 1
	cpi		r16,16
	brbs	1, TAE_LD_tweakdone			;		if last block full, ok
	lsl		r16							;		else add length << 1
	lsl		r16
	lsl		r16
	lsl		r16
	eor		r19, r16
TAE_LD_tweakdone:
	st		z, r19
	LDI		XH, high(SRAM_KEY)			; encrypt sigma
	LDI		XL, low(SRAM_KEY)
	LDI		YH,high(SRAM_TWEAK)
	LDI		YL,low(SRAM_TWEAK)
	RCALL	preparing_round_keys
	ldi		zh, high(SRAM_SIGMA)
	ldi		zl, low(SRAM_SIGMA)
	rcall	encrypt
	ldi		xh, high(SRAM_SIGMA)
	ldi		xl, low(SRAM_SIGMA)
	ldi		zh, high(SRAM_TAG)
	ldi		zl, low(SRAM_TAG)
	ldi		r19, 16
TAE_LD_loop4:
	ld		r17, x+
	ld		r18, z
	eor		r17, r18
	st		z+, r17
	dec		r19
	brbc	1, TAE_LD_loop4

	pop		xl						; compare SRAM_TAG and tag passed as XH:XL
	pop		xh
	ldi		r18,16
	sbiw	z,16
	ldi		r16,1
TAE_LD_loop5:
	ld		r14, x+
	ld		r15, z+
	cp		r14, r15
	brne	TAE_tag_mismatch
	dec		r18
	brbc	1, TAE_LD_loop5
	rjmp	TAE_done
TAE_tag_mismatch:
	ldi		r16,0

TAE_done:
	ret

