cosmopolitan/third_party/mbedtls/bigmul4.c
2022-09-13 04:14:55 -07:00

76 lines
3.3 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:4;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright The Mbed TLS Contributors │
│ │
│ Licensed under the Apache License, Version 2.0 (the "License"); │
│ you may not use this file except in compliance with the License. │
│ You may obtain a copy of the License at │
│ │
│ http://www.apache.org/licenses/LICENSE-2.0 │
│ │
│ Unless required by applicable law or agreed to in writing, software │
│ distributed under the License is distributed on an "AS IS" BASIS, │
│ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. │
│ See the License for the specific language governing permissions and │
│ limitations under the License. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/nexgen32e/x86feature.h"
#include "third_party/mbedtls/bignum_internal.h"
#include "third_party/mbedtls/math.h"
/* clang-format off */
/**
* Computes 512-bit product of 256-bit and 256-bit numbers.
*
* @param C receives 8 quadword result
* @param A is left hand side which must have 4 quadwords
* @param B is right hand side which must have 4 quadwords
* @note words are host endian while array is little endian
* @mayalias
*/
void (*Mul4x4)(uint64_t C[16], const uint64_t A[8], const uint64_t B[8]);
static textstartup void Mul4x4Init()
{
Mul4x4 = X86_HAVE(ADX) && X86_HAVE(BMI2) ? Mul4x4Adx : Mul4x4Pure;
}
const void *const Mul4x4Ctor[] initarray = {Mul4x4Init};
void Mul4x4Pure(uint64_t C[16], const uint64_t A[8], const uint64_t B[8])
{
uint128_t t;
uint64_t h, c1, c2, c3;
uint64_t r0, r1, r2, r3;
c1 = c2 = c3 = 0;
MADD(A[0], B[0], c1, c2, c3);
r0 = c1, c1 = 0;
MADD(A[0], B[1], c2, c3, c1);
MADD(A[1], B[0], c2, c3, c1);
r1 = c2, c2 = 0;
MADD(A[2], B[0], c3, c1, c2);
MADD(A[1], B[1], c3, c1, c2);
MADD(A[0], B[2], c3, c1, c2);
r2 = c3, c3 = 0;
MADD(A[0], B[3], c1, c2, c3);
MADD(A[1], B[2], c1, c2, c3);
MADD(A[2], B[1], c1, c2, c3);
MADD(A[3], B[0], c1, c2, c3);
C[0] = r0;
r3 = c1, c1 = 0;
MADD(A[3], B[1], c2, c3, c1);
MADD(A[2], B[2], c2, c3, c1);
MADD(A[1], B[3], c2, c3, c1);
C[1] = r1;
C[4] = c2, c2 = 0;
MADD(A[2], B[3], c3, c1, c2);
MADD(A[3], B[2], c3, c1, c2);
C[2] = r2;
C[5] = c3, c3 = 0;
MADD(A[3], B[3], c1, c2, c3);
C[3] = r3;
C[6] = c1;
C[7] = c2;
}