Bitcoin & AI. Victory is inevitable

    About some properties of the secp256k1 curve and an attempt to predict its behavior.

    As you know, the problem of discrete logarithms is very complex and people don’t know how to calculate it quickly. Moreover, knowing a point on the curve P = n * G, it is very difficult to make a judgment about the value of n. Even about the approximate value. Let's try even easier: try to make judgments about the sequence$ P (i) = i * G $rather about the values $ i $ knowing the meanings $ P (i) $.

    Let's try to determine how this sequence differs from a random sequence. If the sequence$ P (i) $complex and difficult to predict, it will not differ from a random sequence. And if it is different, it means that the sequence of points on the secp256k1 curve is not so complicated.
    We will build a neural network, which we will teach in the training sequence to distinguish sequences.

    If we can distinguish between a random sequence and a sequence of points, this will mean that there is a certain rather quickly calculated algorithm that allows us to make judgments about the logarithm.

    Let me remind you that calculating the discrete logarithm on an elliptic curve is a very difficult task.
    Take a pre-computed random sequence for the repeatability of the experiment. The quality of this sequence can be checked.

    dieharder -f PM_rand_600.bin -g 201 -a

    You can check nist, but the result will be almost the same.

    We compose a program that will mix the y coordinate of the sequence of curve points and a random sequence in the ratio 1: 8 and write to the x600_btc_32_LH.bin file and at the same time write a pointer to the source in the y600_btc_32_LH.bin curve or random.

    data_preparation.cpp
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    #include 
    using namespace std;
    int main() {
    	int bits = 256;
    	unsigned char buf[32];
    	char *pr;
    	EC_GROUP *group;
    	EC_KEY *eckey = EC_KEY_new();
    	EC_POINT *P;
    	BN_CTX *ctx = BN_CTX_new();
    	BIGNUM *x = BN_new();
    	BIGNUM *n = BN_new();  // начало последовательности точек кривой
    	BIGNUM *y = BN_new();
    	char e_buf[256];
    	FILE * xFile;
    	FILE * yFile;
    	FILE * rFile;
    	xFile = fopen("x600_btc_32_LH.bin", "wb");
    	yFile = fopen("y600_btc_32_LH.bin", "wb");
    	rFile = fopen("PM_rand_600_t.bin", "rb");
    	if (rFile==NULL)
    	{ cout<<" PM_rand.bin NOT FOUND"; return -1; }
    	srand(time(NULL));
    // nid 714. curve secp256k1
    	int nid = 714;
    	if ((group = EC_GROUP_new_by_curve_name(nid)) == NULL) {
    		fprintf(stdout, "\nEC_GROUP_new_curve_name() failed with"
    				" curve %s\n  nid %x", nid);
    	}
    	if (eckey == NULL) {
    		cout << "ABORT2 ";
    		ERR_error_string(ERR_get_error(), e_buf);
    		cout << "E_BUF " << e_buf << endl;
    	}
    	if (!EC_KEY_set_group(eckey, group)) {
    		cout << "ABORT3 ";
    		ERR_error_string(ERR_get_error(), e_buf);
    		cout << "E_BUF " << e_buf << endl;
    	}
    	EC_GROUP_precompute_mult(group, ctx);
    	P = EC_POINT_new(group);
    	BN_rand(n, bits, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY);
    	// n - начало выборки
    	int NN = 60000;
    	for (int i = 0; i < NN; i++) {
    		if ((rand() % 128) < 16) {
    			pr = (char *) "1";
    			if (!EC_POINT_mul(group, P, n, NULL, NULL, ctx)) {
    				cout << "ABORT_10 ";
    				ERR_error_string(ERR_get_error(), e_buf);
    				cout << "E_BUF " << e_buf << endl;
    			}
    			if (!EC_POINT_get_affine_coordinates_GFp(group, P, x, y, ctx)) {
    				cout << "ABORT_11 ";
    				ERR_error_string(ERR_get_error(), e_buf);
    				cout << "E_BUF " << e_buf << endl;
    			}
    			BN_bn2bin(y, buf);
    		} else {
    			pr = (char *) "0";
    			int cind = fread(buf, 32, 1, rFile); // read 32 byte = 256 bit
    		}
    		fwrite(buf, 32, 1, xFile);
    		BN_add_word(n, 1L);  // in P new point n+i
    		fwrite(pr, 1, 1, yFile);
    	}
    	fclose(xFile);
    	fclose (yFile);
    	BN_CTX_free(ctx);
    	EC_GROUP_free(group);
    	BN_free(x);
    	BN_free(y);
    }
    


    And the two received files x600_btc_32_LH.bin and y600_btc_32_LH.bin we feed the network.

    from keras.models import Model
    from keras.utils import np_utils
    from keras.layers import Dense, Input
    from keras.layers import Bidirectional, GRU
    from keras.models import Model
    from keras.optimizers import RMSprop
    import numpy as np
    import keras as ks
    num_classes = 2 
    length = 32
    length_8 = length<<3
    num_train = 50000
    num_test = 10000
    X_train = np.zeros(shape=(num_train, length_8), dtype='uint8')
    y_train = np.zeros(shape=(num_train), dtype='uint8')
    X_test = np.zeros(shape=(num_test, length_8), dtype='uint8')
    y_test = np.zeros(shape=(num_test), dtype='uint8')
    bx_train = np.zeros(shape=(num_train, length), dtype='uint8')
    bx_test = np.zeros(shape=(num_test, length), dtype='uint8')
    f_x = open("./input/x600_btc_32_LH.bin", 'rb')
    for k in xrange(num_train):
        for i in xrange(32):
            bx_train[k, i] = ord(f_x.read(1))
    for k in xrange(num_test):
        for i in xrange(32):
            bx_test[k, i] = ord(f_x.read(1))
    f_x.close()
    f_y = open("./input/y600_btc_32_LH.bin", 'rb')
    for i in xrange(num_train):
        y_train[i] = ord(f_y.read(1))
    for i in xrange(num_test):
        y_test[i] = ord(f_y.read(1))
    f_y.close()
    y_train -= 48
    y_test -= 48
    

    Let's convert bits to bytes in format. Those. one bit of the original sequence is transferred in bytes.

    tab = np.zeros((256,8),dtype='int8')
    for i in xrange(256):
        mask = 1
        for j in xrange(8):
            if i & mask == 0:
                tab[i,j] = 0
            else:
                tab[i,j] = 1
            mask<<1
    for k in xrange(num_train):
        for i in xrange(length):
            for j in xrange(8):
                X_train[k,i*8+j] = tab[bx_train[k,i],j]
    for k in xrange(num_test):
        for i in xrange(length):
            for j in xrange(8):
                X_test[k,i*8+j] = tab[bx_test[k,i],j]
    

    We will translate it into float format and scale it to 0.004 and prepare Y.

    X_train = X_train.astype('float32') 
    X_test = X_test.astype('float32')
    X_train /= 255.
    X_test /= 255.
    Y_train = np_utils.to_categorical(y_train, num_classes)
    Y_test = np_utils.to_categorical(y_test, num_classes)
    

    We take the network quite simple, just change the activation function a little.

    import math
    from keras import backend as K
    from keras.utils.generic_utils import get_custom_objects
    from keras.layers import Activation
    def gaussian(x):
        mu = 64.
        sigma = 10.
        xx = -0.5*((x-mu)/sigma)**2 / sigma / math.sqrt(2*math.pi)
        return K.exp(xx)
    get_custom_objects().update({'gaussian': Activation(gaussian)})
    batch_size = 32
    num_epochs = 16
    hidden_size_1 = 1024
    hidden_size_2 = 1024
    X_train = X_train.reshape(num_train,16,16)
    X_test = X_test.reshape(num_test,16,16)
    inp = Input(shape=(16,16,))
    x = Bidirectional(GRU(1024, return_sequences=True))(inp)
    x = Bidirectional(GRU(1024, return_sequences=False))(x)
    x = Dense(hidden_size_1, activation='sigmoid')(x)
    x = Dense(hidden_size_2, activation='gaussian')(x)
    out = Dense(num_classes, activation='gaussian')(x) 
    model = Model(inputs=inp, outputs=out)
    model.compile(loss='binary_crossentropy',
    #              optimizer='adam',
                  optimizer=RMSprop(lr=0.0001,clipvalue=1, clipnorm=1),
                  metrics=['accuracy'])
    

    The result is quite acceptable, the network distinguishes a sequence of curve points from a random sequence, not as exactly as we would like, but the judgment on the logarithm does.

    mod = model.fit(X_train, Y_train, # Train the model using the training set...
              batch_size=batch_size, epochs=2,
              verbose=1, validation_data=(X_test, Y_test))
    Train on 50000 samples, validate on 10000 samples
    Epoch 1/2
    val_loss: 0.3706 - val_acc: 0.8783
    Epoch 2/2
    val_loss: 0.3703 - val_acc: 0.8783
    

    We have found that a simple ordinary neural network can distinguish between a random sequence and a sequence of points on the secp256k1 curve. This suggests that the network detects patterns in a sequence that must be very complex.

    Today, 04/01/18, this is the most serious vulnerability of the secp256k1 curve, and sooner or later the victory will be for AI.

    All texts and files are posted on GitHub and you can check everything, make sure and improve it.

    Also popular now: