/*
This file is part of mfaktc.
Copyright (C) 2009, 2010, 2012  Oliver Weihe (o.weihe@t-online.de)

mfaktc is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

mfaktc is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
                                
You should have received a copy of the GNU General Public License
along with mfaktc.  If not, see <http://www.gnu.org/licenses/>.
*/

// Welcome to the "dumb num" routines

// remove leading zeroes from a
void dn_normalize (unsigned int *a)
{
  while (a[0] && a[a[0]] == 0) a[0]--;
}

// copy a to b
void dn_copy (unsigned int *a, unsigned int *b)
{
  unsigned int i;

  dn_normalize(a);
  for (i = 0; i <= a[0]; i++) b[i] = a[i];
}

// return TRUE if a = b
int dn_equal (unsigned int *a, unsigned int *b)
{
  unsigned int i;

  dn_normalize(a);
  dn_normalize(b);
  if (a[0] != b[0]) return 0;
  for (i = a[0]; i >= 1; i--)
    if (a[i] != b[i]) return 0;
  return 1;
}

// return TRUE if a >= b
int dn_greater_or_equal (unsigned int *a, unsigned int *b)
{
  unsigned int i;

  dn_normalize(a);
  dn_normalize(b);
  if (a[0] > b[0]) return 1;
  if (a[0] < b[0]) return 0;
  for (i = a[0]; i >= 1; i--) {
    if (a[i] > b[i]) return 1;
    if (a[i] < b[i]) return 0;
  }
  return 1;
}

// return c = a + b
void dn_add (unsigned int *a, unsigned int *b, unsigned int *c)
{
  unsigned int tmp1[64], tmp2[64];
  unsigned int carry = 0;
  unsigned int i;

  dn_normalize(a);
  dn_normalize(b);
  if (a == c) { dn_copy(a, tmp1); a = tmp1; }
  if (b == c) { dn_copy(b, tmp2); b = tmp2; }
  for (i = 1; i <= a[0] || i <= b[0]; i++) {
    unsigned long long tmp = (unsigned long long) carry;
    if (i <= a[0]) tmp += (unsigned long long) a[i];
    if (i <= b[0]) tmp += (unsigned long long) b[i];
    carry = (unsigned int) (tmp >> 32);
    c[i] = (unsigned int) (tmp & 0xFFFFFFFF);
  }
  c[0] = i - 1;
}

// return c = a - b
void dn_sub (unsigned int *a, unsigned int *b, unsigned int *c)
{
  unsigned int tmp1[64], tmp2[64];
  unsigned int i;
  unsigned int borrow = 0;

  dn_normalize(a);
  dn_normalize(b);
  if (a == c) { dn_copy(a, tmp1); a = tmp1; }
  if (b == c) { dn_copy(b, tmp2); b = tmp2; }
  for (i = 1; i <= a[0]; i++) {
    unsigned long long tmp;
    tmp = (unsigned long long) a[i] - (unsigned long long) borrow;
    if (i <= b[0]) tmp -= (unsigned long long) b[i];
    borrow = (unsigned int) (tmp >> 32) & 1;
    c[i] = (unsigned int) (tmp & 0xFFFFFFFF);
  }
  c[0] = a[0];
//if (c[c[0]] > a[a[0]]) printf ("sub failure\n");
}

// decrement the most significant word of a -- specialized routine used in division
void dn_dec_top_word (unsigned int *a)
{
  if (a[a[0]] == 1) {
    a[0]--;
    a[a[0]] = 0xFFFFFFFF;
  } else
    a[a[0]]--;
}

// return c = a * b
void dn_mul (unsigned int *a, unsigned int *b, unsigned int *c)
{
  unsigned int tmp1[64], tmp2[64];
  unsigned int i, j;

  dn_normalize(a);
  dn_normalize(b);
  if (a == c) { dn_copy(a, tmp1); a = tmp1; }
  if (b == c) { dn_copy(b, tmp2); b = tmp2; }
  for (i = 1; i <= a[0] + b[0]; i++) c[i] = 0;
  for (i = 1; i <= a[0]; i++) {
    unsigned int carry = 0;
    for (j = 1; j <= b[0]; j++) {
      unsigned long long tmp;
      tmp = (unsigned long long) a[i] * (unsigned long long) b[j] + (unsigned long long) carry + (unsigned long long) c[i+j-1];
      carry = (unsigned int) (tmp >> 32);
      c[i+j-1] = (unsigned int) (tmp & 0xFFFFFFFF);
    }
    c[i+j-1] = carry;
  }
  c[0] = a[0] + b[0];
}

// return b = a^2
void dn_square (unsigned int *a, unsigned int *b)
{
  dn_mul (a, a, b);
}

// return c = a % b
void dn_mod (unsigned int *a, unsigned int *b, unsigned int *c)
{
  unsigned int tmp2[64];
  double top_c, top_b;

  dn_normalize(a);
  dn_normalize(b);
  if (b == c) { dn_copy(b, tmp2); b = tmp2; }

  dn_copy (a, c);
  top_b = (double) b[b[0]] * 4294967296.0;
  if (b[0] > 1) top_b += (double) b[b[0] - 1];
  while (dn_greater_or_equal (c, b)) {
    unsigned int quotient[64], tmp[64];
    unsigned int i;
    top_c = (double) c[c[0]] * 4294967296.0;
    if (c[0] > 1) top_c += (double) c[c[0] - 1];
    if (top_c >= top_b || c[0] == b[0]) {
      quotient[0] = c[0] - b[0] + 1;
      for (i = 1; i < quotient[0]; i++) quotient[i] = 0;
      quotient[i] = (unsigned int) floor (top_c / top_b);
    } else {
      quotient[0] = c[0] - b[0];
      for (i = 1; i < quotient[0]; i++) quotient[i] = 0;
      quotient[i] = (unsigned int) floor (top_c * 4294967296.0 / top_b);
    }
    if (quotient[i] == 0) quotient[i] = 1;
    for ( ; ; ) {
      dn_mul (b, quotient, tmp);
      if (dn_greater_or_equal (c, tmp)) break;
      dn_dec_top_word (quotient);
    }
//printf ("quot: %u, %u, c: %u %u %u, b: %u %u %u\n", quotient[0], quotient[i], c[0], c[c[0]], c[c[0]-1], b[0], b[b[0]], b[b[0]-1]);
    dn_sub (c, tmp, c);
  }
}

// return a mod b
unsigned int dn_mod_int (unsigned int *a, unsigned int b)
{
  unsigned int i;
  unsigned long long rem;

  dn_normalize(a);
  for (i = a[0], rem = 0; i >= 1; i--)
    rem = ((rem << 32) + a[i]) % b;
  return (unsigned int) rem;
}

void dn_print(unsigned int *n, char *buf)
{
  int512 val;

  val.d15 = (n[0] >= 16) ? n[16] : 0;
  val.d14 = (n[0] >= 15) ? n[15] : 0;
  val.d13 = (n[0] >= 14) ? n[14] : 0;
  val.d12 = (n[0] >= 13) ? n[13] : 0;
  val.d11 = (n[0] >= 12) ? n[12] : 0;
  val.d10 = (n[0] >= 11) ? n[11] : 0;
  val.d9 = (n[0] >= 10) ? n[10] : 0;
  val.d8 = (n[0] >= 9) ? n[9] : 0;
  val.d7 = (n[0] >= 8) ? n[8] : 0;
  val.d6 = (n[0] >= 7) ? n[7] : 0;
  val.d5 = (n[0] >= 6) ? n[6] : 0;
  val.d4 = (n[0] >= 5) ? n[5] : 0;
  val.d3 = (n[0] >= 4) ? n[4] : 0;
  val.d2 = (n[0] >= 3) ? n[3] : 0;
  val.d1 = (n[0] >= 2) ? n[2] : 0;
  val.d0 = (n[0] >= 1) ? n[1] : 0;
  print_dez512 (val, buf);
}

// Validate a Fermat or double-Mersenne exponentiation

void validate_exponentiation(mystuff_t *mystuff, unsigned long long k_remaining)
{
  unsigned int fac[32], rem[32], tmp[64], *hptr, *facptr, *remptr;
  unsigned int i, datalen;

  // get the data

  cudaMemcpy(mystuff->h_RES+RESULTS_ARRAY_VALIDATION_OFFSET,
	     mystuff->d_RES+RESULTS_ARRAY_VALIDATION_OFFSET,
	     (RESULTS_ARRAY_SIZE - RESULTS_ARRAY_VALIDATION_OFFSET)*sizeof(int),
	     cudaMemcpyDeviceToHost);

  hptr = mystuff->h_RES+RESULTS_ARRAY_VALIDATION_OFFSET;
  datalen = *hptr++;
  if (datalen == 0) return;			// Skip validation if no factors survived the sieve

  facptr = fac;
  *facptr++ = datalen;
  for (i = 0; i < datalen; i++) *facptr++ = *hptr++;

  remptr = rem;
  *remptr++ = datalen;
  for (i = 0; i < datalen; i++) *remptr++ = *hptr++;

  // Kernels return a remainder of 0 when they test no factors.  This is just a happenstance
  // as it seems kernels zero out the uninitialized remainder held in registers.  To guard against
  // a kernel erroneously always returning zero, only bypass the validation if it is plausible that
  // the sieve zeroed all the k values.
  dn_normalize (rem);
  if (rem [0] == 0 && k_remaining < 16) return;  // Assume the sieve cleared all the k values

  // output the data

  if (mystuff->verbosity >= 3) {
    char factor_string[180], remainder_string[180];

    dn_print(fac,factor_string);
    dn_print(rem,remainder_string);

    printf("Verifying (2^(2^%d)) %% %s = %s\n", (int) mystuff->exponent, factor_string, remainder_string);
  }

  // make sure the factor has no really small factors - this would indicate calculating the factor or GPU sieving or class_needed is broken

  if (dn_mod_int (fac, 2) == 0 ||
      dn_mod_int (fac, 3) == 0 ||
      dn_mod_int (fac, 5) == 0 ||
      dn_mod_int (fac, 7) == 0 ||
      dn_mod_int (fac, 11) == 0)
    printf ("ERROR: Class problems.  Factor divisible by 2, 3, 5, 7, or 11\n"), exit(1);
  for (i = 13; i <= 251; i++) {
    if (dn_mod_int (fac, i) == 0)
      printf ("ERROR: GPU sieve problems.  Factor divisible by %i\n", i), exit(1);
  }
  if (mystuff->verbosity >= 3) {
    for (i = 257; i <= 1999; i++) {
      if (dn_mod_int (fac, i) == 0)
        printf ("WARNING: Factor divisible by %i.  Only occasionally should GPU sieve let small factors slip through\n", i);
    }
  }

  // validate the exponentiation

  tmp[0] = 1; tmp[1] = 65536;
  for (i = 4; i < (mystuff->fermat_factoring ? mystuff->exponent - 2 : mystuff->exponent); i++) {
	  dn_square (tmp, tmp);
	  dn_mod (tmp, fac, tmp);
  }
  if (! dn_equal (tmp, rem))
      printf ("ERROR: Exponentiation failure\n"), exit(1);
}


// Figure out which Fermat number a factor divides

int which_fermat_number(mystuff_t *mystuff, int factor_index)
{
  unsigned int fac[32], tmp[64], *hptr, *facptr;
  unsigned int i, datalen, exp;

  // get the data

  datalen = mystuff->h_RES[1];

  facptr = fac;
  *facptr++ = datalen;
  hptr = mystuff->h_RES + factor_index * datalen + 2;
  for (i = 0; i < datalen; i++) *facptr++ = *hptr++;

  // Look for the exponentiation that returns fac-1 rather than 1

  for (exp = mystuff->exponent-2; exp > 5; exp--)
  {
    tmp[0] = 1; tmp[1] = 65536;
    for (i = 4; i < exp; i++) {
	  dn_square (tmp, tmp);
	  dn_mod (tmp, fac, tmp);
    }
    if (tmp[0] != 1 || tmp[1] != 1) break;
  }
  return (exp);
}
