Bringing in a Function from Javascript to Big Query StandardSQL

1

I have a javascript that assigns a certain bucket to a customer according to the algorithm number and test number (function getBuckets). However I am having trouble bringing this function over from the javascript and defining it into standard sql. So I can use it on specific databases.

Here is the Input Schema from Big Query that I am trying to get a bucket number for:

  • 7354430 AS customerId,

  • 4 AS algorithmIndex,

  • 5947 AS testId

Here is what the output should be after running my sql:

  • customerId,

  • bucketNumber

Does anyone know how to bring in the getBuckets function from the javascript into big query standard sql in order to get the bucketNumber?

Javascript is below:

/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
/*  SHA-1 implementation in JavaScript                  (c) Chris Veness 2002-2014 / MIT Licence  */
/*                                                                                                */
/*  - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html                              */
/*        http://csrc.nist.gov/groups/ST/toolkit/examples.html                                    */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */

/* jshint node:true *//* global define, escape, unescape */
'use strict';


/**
 * SHA-1 hash function reference implementation.
 *
 * @namespace
 */
var Sha1 = {};


/**
 * Generates SHA-1 hash of string.
 *
 * @param   {string} msg - (Unicode) string to be hashed.
 * @returns {string} Hash of msg as hex character string.
 */
Sha1.hash = function(msg) {
    // convert string to UTF-8, as SHA only deals with byte-streams
    msg = msg.utf8Encode();

    // constants [§4.2.1]
    var K = [ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 ];

    // PREPROCESSING

    msg += String.fromCharCode(0x80);  // add trailing '1' bit (+ 0's padding) to string [§5.1.1]

    // convert string msg into 512-bit/16-integer blocks arrays of ints [§5.2.1]
    var l = msg.length/4 + 2; // length (in 32-bit integers) of msg + ‘1’ + appended length
    var N = Math.ceil(l/16);  // number of 16-integer-blocks required to hold 'l' ints
    var M = new Array(N);

    for (var i=0; i<N; i++) {
        M[i] = new Array(16);
        for (var j=0; j<16; j++) {  // encode 4 chars per integer, big-endian encoding
            M[i][j] = (msg.charCodeAt(i*64+j*4)<<24) | (msg.charCodeAt(i*64+j*4+1)<<16) |
                (msg.charCodeAt(i*64+j*4+2)<<8) | (msg.charCodeAt(i*64+j*4+3));
        } // note running off the end of msg is ok 'cos bitwise ops on NaN return 0
    }
    // add length (in bits) into final pair of 32-bit integers (big-endian) [§5.1.1]
    // note: most significant word would be (len-1)*8 >>> 32, but since JS converts
    // bitwise-op args to 32 bits, we need to simulate this by arithmetic operators
    M[N-1][14] = ((msg.length-1)*8) / Math.pow(2, 32); M[N-1][14] = Math.floor(M[N-1][14]);
    M[N-1][15] = ((msg.length-1)*8) & 0xffffffff;

    // set initial hash value [§5.3.1]
    var H0 = 0x67452301;
    var H1 = 0xefcdab89;
    var H2 = 0x98badcfe;
    var H3 = 0x10325476;
    var H4 = 0xc3d2e1f0;

    // HASH COMPUTATION [§6.1.2]

    var W = new Array(80); var a, b, c, d, e;
    for (var i=0; i<N; i++) {

        // 1 - prepare message schedule 'W'
        for (var t=0;  t<16; t++) W[t] = M[i][t];
        for (var t=16; t<80; t++) W[t] = Sha1.ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);

        // 2 - initialise five working variables a, b, c, d, e with previous hash value
        a = H0; b = H1; c = H2; d = H3; e = H4;

        // 3 - main loop
        for (var t=0; t<80; t++) {
            var s = Math.floor(t/20); // seq for blocks of 'f' functions and 'K' constants
            var T = (Sha1.ROTL(a,5) + Sha1.f(s,b,c,d) + e + K[s] + W[t]) & 0xffffffff;
            e = d;
            d = c;
            c = Sha1.ROTL(b, 30);
            b = a;
            a = T;
        }

        // 4 - compute the new intermediate hash value (note 'addition modulo 2^32')
        H0 = (H0+a) & 0xffffffff;
        H1 = (H1+b) & 0xffffffff;
        H2 = (H2+c) & 0xffffffff;
        H3 = (H3+d) & 0xffffffff;
        H4 = (H4+e) & 0xffffffff;
    }

    return Sha1.toHexStr(H0) + Sha1.toHexStr(H1) + Sha1.toHexStr(H2) +
           Sha1.toHexStr(H3) + Sha1.toHexStr(H4);
};


/**
 * Function 'f' [§4.1.1].
 * @private
 */
Sha1.f = function(s, x, y, z)  {
    switch (s) {
        case 0: return (x & y) ^ (~x & z);           // Ch()
        case 1: return  x ^ y  ^  z;                 // Parity()
        case 2: return (x & y) ^ (x & z) ^ (y & z);  // Maj()
        case 3: return  x ^ y  ^  z;                 // Parity()
    }
};

/**
 * Rotates left (circular left shift) value x by n positions [§3.2.5].
 * @private
 */
Sha1.ROTL = function(x, n) {
    return (x<<n) | (x>>>(32-n));
};


/**
 * Hexadecimal representation of a number.
 * @private
 */
Sha1.toHexStr = function(n) {
    // note can't use toString(16) as it is implementation-dependant,
    // and in IE returns signed numbers when used on full words
    var s="", v;
    for (var i=7; i>=0; i--) { v = (n>>>(i*4)) & 0xf; s += v.toString(16); }
    return s;
};


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */


/** Extend String object with method to encode multi-byte string to utf8
 *  - monsur.hossa.in/2012/07/20/utf-8-in-javascript.html */
if (typeof String.prototype.utf8Encode == 'undefined') {
    String.prototype.utf8Encode = function() {
        return unescape( encodeURIComponent( this ) );
    };
}

/** Extend String object with method to decode utf8 string to multi-byte */
if (typeof String.prototype.utf8Decode == 'undefined') {
    String.prototype.utf8Decode = function() {
        try {
            return decodeURIComponent( escape( this ) );
        } catch (e) {
            return this; // invalid UTF-8? return as-is
        }
    };
}


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
if (typeof module != 'undefined' && module.exports) module.exports = Sha1; // CommonJs export
if (typeof define == 'function' && define.amd) define([], function() { return Sha1; }); // AMD



Sha1.getDeterministicBuckets = function(customerId,algorithmIndex,testNumber)  {

  var testDescriptors = {
  28:{s:100, d:'1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1'},
  29:{s:21, d:'40/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3'}
  };


  var seed = (customerId+testNumber).toString();
  var sha1 = Sha1.hash(seed);
  var end = sha1.substr(36)
  var c = parseInt(end, 16)/65535.0;

  var result = '';
  /*
  if ( algorithmIndex == 3 )
      if ( c < 0.10 ) result =  9;
      else if ( c < 0.20 ) result =  8;
      else if ( c < 0.30 ) result =  7;
      else if ( c < 0.40 ) result =  6;
      else if ( c < 0.50 ) result =  5;
      else if ( c < 0.60 ) result =  4;
      else if ( c < 0.70 ) result =  3;
      else if ( c < 0.80 ) result =  2;
      else if ( c < 0.90 ) result =  1;
      else result =  0;
  */

  /* (on 6/3/13) Bad inputs (and/or logged out users) yield a -1. */
        if ( customerId == null || algorithmIndex == null || testNumber == null ) {

                   result =  -1;
             if( algorithmIndex < 1 ||  algorithmIndex > 27)
            result =  0;

        /*  25/25/50 */
        }else if ( algorithmIndex == 1 ) {

                     if ( c < 0.25 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  1/99 */
        }else if ( algorithmIndex == 2 ) {

                if ( c < 0.01 ) result =  1
                else result =  0;


        /*  10/10/10/... */
        }else if ( algorithmIndex == 3 ){

                     if ( c < 0.10 ) result =  9;
                else if ( c < 0.20 ) result =  8;
                else if ( c < 0.30 ) result =  7;
                else if ( c < 0.40 ) result =  6;
                else if ( c < 0.50 ) result =  5;
                else if ( c < 0.60 ) result =  4;
                else if ( c < 0.70 ) result =  3;
                else if ( c < 0.80 ) result =  2;
                else if ( c < 0.90 ) result =  1;
                else result =  0;


        /*  25/25/25/25 */
        }else if ( algorithmIndex == 4 ) {

                 if ( c < 0.25 ) result =  3;
            else if ( c < 0.50 ) result =  2;
            else if ( c < 0.75 ) result =  1;
            else result =  0;


        /*  50/50 */
        }else if ( algorithmIndex == 5 ) {

                if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  10/90 */
        }else if ( algorithmIndex == 6 ) {

                if ( c < 0.10 ) result =  1;
                else result =  0;


        /*  10/10/10/10/10/50 */
        }else if ( algorithmIndex == 7 ) {

                     if ( c < 0.10 ) result =  5;
                else if ( c < 0.20 ) result =  4;
                else if ( c < 0.30 ) result =  3;
                else if ( c < 0.40 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  20/20/20/20/20 */
        }else if ( algorithmIndex == 8 ){

                     if ( c < 0.20 ) result =  4;
                else if ( c < 0.40 ) result =  3;
                else if ( c < 0.60 ) result =  2;
                else if ( c < 0.80 ) result =  1;
                else result =  0;


        /*  96/2/2 */
        }else if ( algorithmIndex == 9 ) {

                     if ( c < 0.02 ) result =  2;
                else if ( c < 0.04 ) result =  1;
                else result =  0;


        /*  80/20 */
        }else if ( algorithmIndex == 10 ) {

                if ( c < 0.20 ) result =  1;
                else result =  0;


        /*  12.5/12.5/12.5/etc */
        }else if ( algorithmIndex == 11 ) {

                     if ( c < 0.125 ) result =  7;
                else if ( c < 0.250 ) result =  6;
                else if ( c < 0.375 ) result =  5;
                else if ( c < 0.500 ) result =  4;
                else if ( c < 0.625 ) result =  3;
                else if ( c < 0.750 ) result =  2;
                else if ( c < 0.875 ) result =  1;
                else result =  0;


        /*  50/10/20/20 */
        }else if ( algorithmIndex == 12 ) {

                     if ( c < 0.20 ) result =  3;
                else if ( c < 0.40 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  100 */
        }else if ( algorithmIndex == 13 ) {

            result =  0;


        /*  80/2/2/2/2/2/2/2/2/2/2 */
        }else if ( algorithmIndex == 14 ) {

                     if ( c < 0.02 ) result =  10;
                else if ( c < 0.04 ) result =  9;
                else if ( c < 0.06 ) result =  8;
                else if ( c < 0.08 ) result =  7;
                else if ( c < 0.10 ) result =  6;
                else if ( c < 0.12 ) result =  5;
                else if ( c < 0.14 ) result =  4;
                else if ( c < 0.16 ) result =  3;
                else if ( c < 0.18 ) result =  2;
                else if ( c < 0.20 ) result =  1;
                else result =  0;


        /*  11/11/3/3/3/3/.... */
        }else if ( algorithmIndex == 15 ) {

                     if ( c < 0.03 ) result =  27;
                  else if ( c < 0.06 ) result =  26;
                  else if ( c < 0.09 ) result =  25;
                  else if ( c < 0.12 ) result =  24;
                  else if ( c < 0.15 ) result =  23;
                  else if ( c < 0.18 ) result =  22;
                  else if ( c < 0.21 ) result =  21;
                  else if ( c < 0.24 ) result =  20;
                  else if ( c < 0.27 ) result =  19;
                  else if ( c < 0.30 ) result =  18;
                  else if ( c < 0.33 ) result =  17;
                  else if ( c < 0.36 ) result =  16;
                  else if ( c < 0.39 ) result =  15;
                  else if ( c < 0.42 ) result =  14;
                  else if ( c < 0.45 ) result =  13;
                  else if ( c < 0.48 ) result =  12;
                  else if ( c < 0.51 ) result =  11;
                  else if ( c < 0.54 ) result =  10;
                  else if ( c < 0.57 ) result =  9;
                  else if ( c < 0.60 ) result =  8;
                  else if ( c < 0.63 ) result =  7;
                  else if ( c < 0.66 ) result =  6;
                  else if ( c < 0.69 ) result =  5;
                  else if ( c < 0.72 ) result =  4;
                  else if ( c < 0.75 ) result =  3;
                  else if ( c < 0.78 ) result =  2;
                  else if ( c < 0.89 ) result =  1;
                  else result =  0;


        /*  23/23/23/23/8 */
         } else if ( algorithmIndex == 16 ) {

                       if ( c < 0.08 ) result =  4;
                  else if ( c < 0.31 ) result =  3;
                  else if ( c < 0.54 ) result =  2;
                  else if ( c < 0.77 ) result =  1;
                  else result =  0;


          /*  97/0.5/0.5/0.5/0.5/0.5?0.5 */
         }else  if ( algorithmIndex == 17 ) {

                     if ( c < 0.005 ) result =  6;
                else if ( c < 0.010 ) result =  5;
                  else if ( c < 0.015 ) result =  4;
                  else if ( c < 0.020 ) result =  3;
                  else if ( c < 0.025 ) result =  2;
                  else if ( c < 0.030 ) result =  1;
                  else result =  0;


          /*  80/10/10 */
            } else  if ( algorithmIndex == 18 ){

                           if ( c < 0.10 ) result =  2;
                      else if ( c < 0.20 ) result =  1;
                      else result =  0;


              /*  70/10/10/10 */
            } else  if ( algorithmIndex == 19 ){

                           if ( c < 0.10 ) result =  3;
                      else if ( c < 0.20 ) result =  2;
                      else if ( c < 0.30 ) result =  1;
                      else result =  0;


              /*  90/5/5 */
              }else if ( algorithmIndex == 20 ){

                           if ( c < 0.05 ) result =  2;
                      else if ( c < 0.10 ) result =  1;
                      else result =  0;


              /*  80/5/5/5/5 */
              }else if ( algorithmIndex == 21 ){

                           if ( c < 0.05 ) result =  4;
                      else if ( c < 0.10 ) result =  3;
                      else if ( c < 0.15 ) result =  2;
                      else if ( c < 0.20 ) result =  1;
                      else result =  0;


              /*  45/45/10 */
            }else if ( algorithmIndex == 22 ) {

                         if ( c < 0.10 ) result =  2;
                    else if ( c < 0.55 ) result =  1;
                    else result =  0;


            /*  5.88 x 17 ... seriously? */
             } else if ( algorithmIndex == 23 ){

                           if ( c < 0.0588 ) result =  16;
                      else if ( c < 0.1176 ) result =  15
                      else if ( c < 0.1764 ) result =  14;
                      else if ( c < 0.2352 ) result =  13;
                      else if ( c < 0.2940 ) result =  12;
                      else if ( c < 0.3528 ) result =  11;
                      else if ( c < 0.4116 ) result =  10;
                      else if ( c < 0.4704 ) result =  9;
                      else if ( c < 0.5292 ) result =  8;
                      else if ( c < 0.5880 ) result =  7;
                      else if ( c < 0.6468 ) result =  6;
                      else if ( c < 0.7056 ) result =  5;
                      else if ( c < 0.7644 ) result =  4;
                      else if ( c < 0.8232 ) result =  3;
                      else if ( c < 0.8820 ) result =  2;
                      else if ( c < 0.9401 ) result =  1;
                      else result =  0;

            /*  97.5/2.5 */
            }else if ( algorithmIndex == 24 ) {

                         if ( c < 0.025 ) result =  1;
                    else result =  0;

            /*  92.5/7.5 */
            }else if ( algorithmIndex == 25 ) {

                         if ( c < 0.075 ) result =  1;
                    else result =  0;

            /*  50/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5 */
            /* not sure why this one was setup with results asc vs desc; results were validated and match webstore */
            }else if ( algorithmIndex == 26 ) {

                         if ( c < 0.025 ) result =  1;
                    else if ( c < 0.050 ) result =  2;
                    else if ( c < 0.075 ) result =  3;
                    else if ( c < 0.100 ) result =  4;
                    else if ( c < 0.125 ) result =  5;
                    else if ( c < 0.150 ) result =  6;
                    else if ( c < 0.175 ) result =  7;
                    else if ( c < 0.200 ) result =  8;
                    else if ( c < 0.225 ) result =  9;
                    else if ( c < 0.250 ) result =  10;
                    else if ( c < 0.275 ) result =  11;
                    else if ( c < 0.300 ) result =  12;
                    else if ( c < 0.325 ) result =  13;
                    else if ( c < 0.350 ) result =  14;
                    else if ( c < 0.375 ) result =  15;
                    else if ( c < 0.400 ) result =  16;
                    else if ( c < 0.425 ) result =  17;
                    else if ( c < 0.450 ) result =  18;
                    else if ( c < 0.475 ) result =  19;
                    else if ( c < 0.500 ) result =  20;
                    else result =  0;

                    /*  33.3/33.3/33.3 */
                    } else if ( algorithmIndex == 27 ) {

                         if ( c < 0.333 ) result =  2;
                    else if ( c < 0.666 ) result =  1;
                    else result =  0;

           } else {
                if (algorithmIndex in testDescriptors) {
                        var distributions = testDescriptors[algorithmIndex].d.split('/');
                        var threshold = 0.0;
                        result = 0;
                        for (var i = 1; i < distributions.length; i++) {
                                threshold += parseFloat(distributions[i])/100.0;
                                if (c < threshold) {
                                        result = i;
                                        break;
                                }
                        }
                }
            }


 return result;
};


function getBuckets(row, emit) {
  var result = Sha1.getDeterministicBuckets(row.inCustomerId,row.inAlgorithmIndex,row.inTestNumber);
  emit({bucketNumber: result , CustomerId: row.inCustomerId});
};


bigquery.defineFunction(
  'getBuckets',                           // Name of the function exported to SQL
  ['inCustomerId','inAlgorithmIndex','inTestNumber'],                    // Names of input columns
  [{'name': 'bucketNumber', 'type': 'integer'}, // Output schema
  {'name': 'CustomerId', 'type': 'integer'}],
  getBuckets                       // Reference to JavaScript UDF
);
javascript
google-bigquery
user-defined-functions
asked on Stack Overflow Jun 8, 2020 by Josh Simpson

3 Answers

1

I fixed it for #standardSQL.

First, add these 3 lines at the beginning, to define a JS UDF:

CREATE TEMP FUNCTION getDeterministicBuckets(customerId INT64, algorithmIndex INT64, testId INT64)
RETURNS STRUCT<bucketNumber INT64, CustomerId INT64>
LANGUAGE js AS """
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
/*  SHA-1 implementation in JavaScript                  (c) Chris Veness 2002-2014 / MIT Licence  */
/*                                                                                                */
/*  - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html                              */
/*        http://csrc.nist.gov/groups/ST/toolkit/examples.html                                    */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */

At the end, instead of a signature for a function:

  1. Don't use emit()
  2. return instead

Like in:


return {bucketNumber: Sha1.getDeterministicBuckets(customerId, algorithmIndex, testId) , CustomerId: customerId};

""";

That will allow you to call the function like this:

SELECT getDeterministicBuckets(7354430,4,5947) x 

Complete working code:

CREATE TEMP FUNCTION getDeterministicBuckets(customerId INT64, algorithmIndex INT64, testId INT64)
RETURNS STRUCT<bucketNumber INT64, CustomerId INT64>
LANGUAGE js AS """
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
/*  SHA-1 implementation in JavaScript                  (c) Chris Veness 2002-2014 / MIT Licence  */
/*                                                                                                */
/*  - see http://csrc.nist.gov/groups/ST/toolkit/secure_hashing.html                              */
/*        http://csrc.nist.gov/groups/ST/toolkit/examples.html                                    */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */

/* jshint node:true *//* global define, escape, unescape */
'use strict';


/**
 * SHA-1 hash function reference implementation.
 *
 * @namespace
 */
var Sha1 = {};


/**
 * Generates SHA-1 hash of string.
 *
 * @param   {string} msg - (Unicode) string to be hashed.
 * @returns {string} Hash of msg as hex character string.
 */
Sha1.hash = function(msg) {
    // convert string to UTF-8, as SHA only deals with byte-streams
    msg = msg.utf8Encode();

    // constants [§4.2.1]
    var K = [ 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 ];

    // PREPROCESSING

    msg += String.fromCharCode(0x80);  // add trailing '1' bit (+ 0's padding) to string [§5.1.1]

    // convert string msg into 512-bit/16-integer blocks arrays of ints [§5.2.1]
    var l = msg.length/4 + 2; // length (in 32-bit integers) of msg + ‘1’ + appended length
    var N = Math.ceil(l/16);  // number of 16-integer-blocks required to hold 'l' ints
    var M = new Array(N);

    for (var i=0; i<N; i++) {
        M[i] = new Array(16);
        for (var j=0; j<16; j++) {  // encode 4 chars per integer, big-endian encoding
            M[i][j] = (msg.charCodeAt(i*64+j*4)<<24) | (msg.charCodeAt(i*64+j*4+1)<<16) |
                (msg.charCodeAt(i*64+j*4+2)<<8) | (msg.charCodeAt(i*64+j*4+3));
        } // note running off the end of msg is ok 'cos bitwise ops on NaN return 0
    }
    // add length (in bits) into final pair of 32-bit integers (big-endian) [§5.1.1]
    // note: most significant word would be (len-1)*8 >>> 32, but since JS converts
    // bitwise-op args to 32 bits, we need to simulate this by arithmetic operators
    M[N-1][14] = ((msg.length-1)*8) / Math.pow(2, 32); M[N-1][14] = Math.floor(M[N-1][14]);
    M[N-1][15] = ((msg.length-1)*8) & 0xffffffff;

    // set initial hash value [§5.3.1]
    var H0 = 0x67452301;
    var H1 = 0xefcdab89;
    var H2 = 0x98badcfe;
    var H3 = 0x10325476;
    var H4 = 0xc3d2e1f0;

    // HASH COMPUTATION [§6.1.2]

    var W = new Array(80); var a, b, c, d, e;
    for (var i=0; i<N; i++) {

        // 1 - prepare message schedule 'W'
        for (var t=0;  t<16; t++) W[t] = M[i][t];
        for (var t=16; t<80; t++) W[t] = Sha1.ROTL(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);

        // 2 - initialise five working variables a, b, c, d, e with previous hash value
        a = H0; b = H1; c = H2; d = H3; e = H4;

        // 3 - main loop
        for (var t=0; t<80; t++) {
            var s = Math.floor(t/20); // seq for blocks of 'f' functions and 'K' constants
            var T = (Sha1.ROTL(a,5) + Sha1.f(s,b,c,d) + e + K[s] + W[t]) & 0xffffffff;
            e = d;
            d = c;
            c = Sha1.ROTL(b, 30);
            b = a;
            a = T;
        }

        // 4 - compute the new intermediate hash value (note 'addition modulo 2^32')
        H0 = (H0+a) & 0xffffffff;
        H1 = (H1+b) & 0xffffffff;
        H2 = (H2+c) & 0xffffffff;
        H3 = (H3+d) & 0xffffffff;
        H4 = (H4+e) & 0xffffffff;
    }

    return Sha1.toHexStr(H0) + Sha1.toHexStr(H1) + Sha1.toHexStr(H2) +
           Sha1.toHexStr(H3) + Sha1.toHexStr(H4);
};


/**
 * Function 'f' [§4.1.1].
 * @private
 */
Sha1.f = function(s, x, y, z)  {
    switch (s) {
        case 0: return (x & y) ^ (~x & z);           // Ch()
        case 1: return  x ^ y  ^  z;                 // Parity()
        case 2: return (x & y) ^ (x & z) ^ (y & z);  // Maj()
        case 3: return  x ^ y  ^  z;                 // Parity()
    }
};

/**
 * Rotates left (circular left shift) value x by n positions [§3.2.5].
 * @private
 */
Sha1.ROTL = function(x, n) {
    return (x<<n) | (x>>>(32-n));
};


/**
 * Hexadecimal representation of a number.
 * @private
 */
Sha1.toHexStr = function(n) {
    // note can't use toString(16) as it is implementation-dependant,
    // and in IE returns signed numbers when used on full words
    var s="", v;
    for (var i=7; i>=0; i--) { v = (n>>>(i*4)) & 0xf; s += v.toString(16); }
    return s;
};


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */


/** Extend String object with method to encode multi-byte string to utf8
 *  - monsur.hossa.in/2012/07/20/utf-8-in-javascript.html */
if (typeof String.prototype.utf8Encode == 'undefined') {
    String.prototype.utf8Encode = function() {
        return unescape( encodeURIComponent( this ) );
    };
}

/** Extend String object with method to decode utf8 string to multi-byte */
if (typeof String.prototype.utf8Decode == 'undefined') {
    String.prototype.utf8Decode = function() {
        try {
            return decodeURIComponent( escape( this ) );
        } catch (e) {
            return this; // invalid UTF-8? return as-is
        }
    };
}


/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  */
if (typeof module != 'undefined' && module.exports) module.exports = Sha1; // CommonJs export
if (typeof define == 'function' && define.amd) define([], function() { return Sha1; }); // AMD



Sha1.getDeterministicBuckets = function(customerId,algorithmIndex,testNumber)  {

  var testDescriptors = {
  28:{s:100, d:'1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1/1'},
  29:{s:21, d:'40/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3/3'}
  };


  var seed = (customerId+testNumber).toString();
  var sha1 = Sha1.hash(seed);
  var end = sha1.substr(36)
  var c = parseInt(end, 16)/65535.0;

  var result = '';
  /*
  if ( algorithmIndex == 3 )
      if ( c < 0.10 ) result =  9;
      else if ( c < 0.20 ) result =  8;
      else if ( c < 0.30 ) result =  7;
      else if ( c < 0.40 ) result =  6;
      else if ( c < 0.50 ) result =  5;
      else if ( c < 0.60 ) result =  4;
      else if ( c < 0.70 ) result =  3;
      else if ( c < 0.80 ) result =  2;
      else if ( c < 0.90 ) result =  1;
      else result =  0;
  */

  /* (on 6/3/13) Bad inputs (and/or logged out users) yield a -1. */
        if ( customerId == null || algorithmIndex == null || testNumber == null ) {

                   result =  -1;
             if( algorithmIndex < 1 ||  algorithmIndex > 27)
            result =  0;

        /*  25/25/50 */
        }else if ( algorithmIndex == 1 ) {

                     if ( c < 0.25 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  1/99 */
        }else if ( algorithmIndex == 2 ) {

                if ( c < 0.01 ) result =  1
                else result =  0;


        /*  10/10/10/... */
        }else if ( algorithmIndex == 3 ){

                     if ( c < 0.10 ) result =  9;
                else if ( c < 0.20 ) result =  8;
                else if ( c < 0.30 ) result =  7;
                else if ( c < 0.40 ) result =  6;
                else if ( c < 0.50 ) result =  5;
                else if ( c < 0.60 ) result =  4;
                else if ( c < 0.70 ) result =  3;
                else if ( c < 0.80 ) result =  2;
                else if ( c < 0.90 ) result =  1;
                else result =  0;


        /*  25/25/25/25 */
        }else if ( algorithmIndex == 4 ) {

                 if ( c < 0.25 ) result =  3;
            else if ( c < 0.50 ) result =  2;
            else if ( c < 0.75 ) result =  1;
            else result =  0;


        /*  50/50 */
        }else if ( algorithmIndex == 5 ) {

                if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  10/90 */
        }else if ( algorithmIndex == 6 ) {

                if ( c < 0.10 ) result =  1;
                else result =  0;


        /*  10/10/10/10/10/50 */
        }else if ( algorithmIndex == 7 ) {

                     if ( c < 0.10 ) result =  5;
                else if ( c < 0.20 ) result =  4;
                else if ( c < 0.30 ) result =  3;
                else if ( c < 0.40 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  20/20/20/20/20 */
        }else if ( algorithmIndex == 8 ){

                     if ( c < 0.20 ) result =  4;
                else if ( c < 0.40 ) result =  3;
                else if ( c < 0.60 ) result =  2;
                else if ( c < 0.80 ) result =  1;
                else result =  0;


        /*  96/2/2 */
        }else if ( algorithmIndex == 9 ) {

                     if ( c < 0.02 ) result =  2;
                else if ( c < 0.04 ) result =  1;
                else result =  0;


        /*  80/20 */
        }else if ( algorithmIndex == 10 ) {

                if ( c < 0.20 ) result =  1;
                else result =  0;


        /*  12.5/12.5/12.5/etc */
        }else if ( algorithmIndex == 11 ) {

                     if ( c < 0.125 ) result =  7;
                else if ( c < 0.250 ) result =  6;
                else if ( c < 0.375 ) result =  5;
                else if ( c < 0.500 ) result =  4;
                else if ( c < 0.625 ) result =  3;
                else if ( c < 0.750 ) result =  2;
                else if ( c < 0.875 ) result =  1;
                else result =  0;


        /*  50/10/20/20 */
        }else if ( algorithmIndex == 12 ) {

                     if ( c < 0.20 ) result =  3;
                else if ( c < 0.40 ) result =  2;
                else if ( c < 0.50 ) result =  1;
                else result =  0;


        /*  100 */
        }else if ( algorithmIndex == 13 ) {

            result =  0;


        /*  80/2/2/2/2/2/2/2/2/2/2 */
        }else if ( algorithmIndex == 14 ) {

                     if ( c < 0.02 ) result =  10;
                else if ( c < 0.04 ) result =  9;
                else if ( c < 0.06 ) result =  8;
                else if ( c < 0.08 ) result =  7;
                else if ( c < 0.10 ) result =  6;
                else if ( c < 0.12 ) result =  5;
                else if ( c < 0.14 ) result =  4;
                else if ( c < 0.16 ) result =  3;
                else if ( c < 0.18 ) result =  2;
                else if ( c < 0.20 ) result =  1;
                else result =  0;


        /*  11/11/3/3/3/3/.... */
        }else if ( algorithmIndex == 15 ) {

                     if ( c < 0.03 ) result =  27;
                  else if ( c < 0.06 ) result =  26;
                  else if ( c < 0.09 ) result =  25;
                  else if ( c < 0.12 ) result =  24;
                  else if ( c < 0.15 ) result =  23;
                  else if ( c < 0.18 ) result =  22;
                  else if ( c < 0.21 ) result =  21;
                  else if ( c < 0.24 ) result =  20;
                  else if ( c < 0.27 ) result =  19;
                  else if ( c < 0.30 ) result =  18;
                  else if ( c < 0.33 ) result =  17;
                  else if ( c < 0.36 ) result =  16;
                  else if ( c < 0.39 ) result =  15;
                  else if ( c < 0.42 ) result =  14;
                  else if ( c < 0.45 ) result =  13;
                  else if ( c < 0.48 ) result =  12;
                  else if ( c < 0.51 ) result =  11;
                  else if ( c < 0.54 ) result =  10;
                  else if ( c < 0.57 ) result =  9;
                  else if ( c < 0.60 ) result =  8;
                  else if ( c < 0.63 ) result =  7;
                  else if ( c < 0.66 ) result =  6;
                  else if ( c < 0.69 ) result =  5;
                  else if ( c < 0.72 ) result =  4;
                  else if ( c < 0.75 ) result =  3;
                  else if ( c < 0.78 ) result =  2;
                  else if ( c < 0.89 ) result =  1;
                  else result =  0;


        /*  23/23/23/23/8 */
         } else if ( algorithmIndex == 16 ) {

                       if ( c < 0.08 ) result =  4;
                  else if ( c < 0.31 ) result =  3;
                  else if ( c < 0.54 ) result =  2;
                  else if ( c < 0.77 ) result =  1;
                  else result =  0;


          /*  97/0.5/0.5/0.5/0.5/0.5?0.5 */
         }else  if ( algorithmIndex == 17 ) {

                     if ( c < 0.005 ) result =  6;
                else if ( c < 0.010 ) result =  5;
                  else if ( c < 0.015 ) result =  4;
                  else if ( c < 0.020 ) result =  3;
                  else if ( c < 0.025 ) result =  2;
                  else if ( c < 0.030 ) result =  1;
                  else result =  0;


          /*  80/10/10 */
            } else  if ( algorithmIndex == 18 ){

                           if ( c < 0.10 ) result =  2;
                      else if ( c < 0.20 ) result =  1;
                      else result =  0;


              /*  70/10/10/10 */
            } else  if ( algorithmIndex == 19 ){

                           if ( c < 0.10 ) result =  3;
                      else if ( c < 0.20 ) result =  2;
                      else if ( c < 0.30 ) result =  1;
                      else result =  0;


              /*  90/5/5 */
              }else if ( algorithmIndex == 20 ){

                           if ( c < 0.05 ) result =  2;
                      else if ( c < 0.10 ) result =  1;
                      else result =  0;


              /*  80/5/5/5/5 */
              }else if ( algorithmIndex == 21 ){

                           if ( c < 0.05 ) result =  4;
                      else if ( c < 0.10 ) result =  3;
                      else if ( c < 0.15 ) result =  2;
                      else if ( c < 0.20 ) result =  1;
                      else result =  0;


              /*  45/45/10 */
            }else if ( algorithmIndex == 22 ) {

                         if ( c < 0.10 ) result =  2;
                    else if ( c < 0.55 ) result =  1;
                    else result =  0;


            /*  5.88 x 17 ... seriously? */
             } else if ( algorithmIndex == 23 ){

                           if ( c < 0.0588 ) result =  16;
                      else if ( c < 0.1176 ) result =  15
                      else if ( c < 0.1764 ) result =  14;
                      else if ( c < 0.2352 ) result =  13;
                      else if ( c < 0.2940 ) result =  12;
                      else if ( c < 0.3528 ) result =  11;
                      else if ( c < 0.4116 ) result =  10;
                      else if ( c < 0.4704 ) result =  9;
                      else if ( c < 0.5292 ) result =  8;
                      else if ( c < 0.5880 ) result =  7;
                      else if ( c < 0.6468 ) result =  6;
                      else if ( c < 0.7056 ) result =  5;
                      else if ( c < 0.7644 ) result =  4;
                      else if ( c < 0.8232 ) result =  3;
                      else if ( c < 0.8820 ) result =  2;
                      else if ( c < 0.9401 ) result =  1;
                      else result =  0;

            /*  97.5/2.5 */
            }else if ( algorithmIndex == 24 ) {

                         if ( c < 0.025 ) result =  1;
                    else result =  0;

            /*  92.5/7.5 */
            }else if ( algorithmIndex == 25 ) {

                         if ( c < 0.075 ) result =  1;
                    else result =  0;

            /*  50/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5/2.5 */
            /* not sure why this one was setup with results asc vs desc; results were validated and match webstore */
            }else if ( algorithmIndex == 26 ) {

                         if ( c < 0.025 ) result =  1;
                    else if ( c < 0.050 ) result =  2;
                    else if ( c < 0.075 ) result =  3;
                    else if ( c < 0.100 ) result =  4;
                    else if ( c < 0.125 ) result =  5;
                    else if ( c < 0.150 ) result =  6;
                    else if ( c < 0.175 ) result =  7;
                    else if ( c < 0.200 ) result =  8;
                    else if ( c < 0.225 ) result =  9;
                    else if ( c < 0.250 ) result =  10;
                    else if ( c < 0.275 ) result =  11;
                    else if ( c < 0.300 ) result =  12;
                    else if ( c < 0.325 ) result =  13;
                    else if ( c < 0.350 ) result =  14;
                    else if ( c < 0.375 ) result =  15;
                    else if ( c < 0.400 ) result =  16;
                    else if ( c < 0.425 ) result =  17;
                    else if ( c < 0.450 ) result =  18;
                    else if ( c < 0.475 ) result =  19;
                    else if ( c < 0.500 ) result =  20;
                    else result =  0;

                    /*  33.3/33.3/33.3 */
                    } else if ( algorithmIndex == 27 ) {

                         if ( c < 0.333 ) result =  2;
                    else if ( c < 0.666 ) result =  1;
                    else result =  0;

           } else {
                if (algorithmIndex in testDescriptors) {
                        var distributions = testDescriptors[algorithmIndex].d.split('/');
                        var threshold = 0.0;
                        result = 0;
                        for (var i = 1; i < distributions.length; i++) {
                                threshold += parseFloat(distributions[i])/100.0;
                                if (c < threshold) {
                                        result = i;
                                        break;
                                }
                        }
                }
            }


 return result;
};



return {bucketNumber: Sha1.getDeterministicBuckets(customerId, algorithmIndex, testId) , CustomerId: customerId};

""";

SELECT getDeterministicBuckets(7354430,4,5947) x 
answered on Stack Overflow Jun 8, 2020 by Felipe Hoffa • edited Jun 8, 2020 by Felipe Hoffa
0

If your question is how to call your JS function in the example, you can follow https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#including-javascript-libraries to:

  1. Upload the JS file to GCS bucket
  2. Create a function to call the JS function, which will be something like
CREATE OR REPLACE FUNCTION yourDataset.getBuckets(customerId STRING, algorithmIndex FLOAT64, testId STRING)
RETURNS STRING
LANGUAGE js
OPTIONS (
    library=["gs://my-bucket/path/to/lib1.js", "gs://my-bucket/path/to/lib2.js"]
)
AS """
return yourJS.getBuckets(...);
"""
answered on Stack Overflow Jun 8, 2020 by Yun Zhang
0

Looks like your JavaScript is SHA-1 implementation in JavaScript

So, how about simply using built-in SHA1 function

answered on Stack Overflow Jun 8, 2020 by Mikhail Berlyant

User contributions licensed under CC BY-SA 3.0