{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setting Everything up to get started"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import tensorflow_datasets as tfd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def visualize_images(data):\n",
    "    num_images = 5\n",
    "    im = np.zeros((num_images * 28, num_images*28))\n",
    "    k = 0\n",
    "    for i in range(num_images):\n",
    "        for j in range(num_images):\n",
    "            im[i*28:(i+1)*28, j*28:(j+1)*28] = data[k]\n",
    "            k += 1\n",
    "    plt.imshow(im, cmap='gray')\n",
    "    plt.show"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnist = tf.keras.datasets.mnist\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "visualize_images(x_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Creating a Data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "training_data = tf.data.Dataset.from_tensor_slices((x_train,y_train.reshape(-1,))).shuffle(10000).batch(32)\n",
    "test_data = tf.data.Dataset.from_tensors((x_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Building the first simple model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "linear_model = tf.keras.models.Sequential([\n",
    "    tf.keras.Input([28, 28]),\n",
    "    tf.keras.layers.Flatten(),\n",
    "    tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n",
    "])\n",
    "\n",
    "opt = tf.keras.optimizers.SGD(learning_rate=0.001)\n",
    "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()\n",
    "\n",
    "\n",
    "@tf.function\n",
    "def training_step(batch, labels):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        output=linear_model(batch)\n",
    "        loss = loss_fn(labels, output)\n",
    "        \n",
    "    gradients = tape.gradient(loss, linear_model.trainable_weights)\n",
    "    opt.apply_gradients(zip(gradients, linear_model.trainable_weights))\n",
    "    \n",
    "    return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(4):\n",
    "    for i,(images, labels) in enumerate(training_data): \n",
    "        loss = training_step(images, labels)\n",
    "\n",
    "        if not i % 500:\n",
    "            print(i, \" : \", loss.numpy())\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_accuracy(model, dataset):\n",
    "    correct_predictions = 0\n",
    "    predictions = 0\n",
    "    for images, labels in dataset:\n",
    "        output = model(images)\n",
    "        predictions += output.shape[0]\n",
    "        correct_predictions += tf.reduce_sum(tf.cast(tf.equal(labels, tf.cast(tf.argmax(output, axis=-1), dtype=tf.uint8)), dtype=tf.float32))\n",
    "    return correct_predictions / predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(calculate_accuracy(linear_model, training_data))\n",
    "print(calculate_accuracy(linear_model, test_data))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pretty good but not good enough"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# First practice task let them create some fully connected layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "MLP = tf.keras.models.Sequential([\n",
    "    tf.keras.Input([28, 28]),\n",
    "    tf.keras.layers.Flatten(),\n",
    "    tf.keras.layers.Dense(100, activation=tf.nn.relu),\n",
    "    tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n",
    "])\n",
    "\n",
    "opt = tf.keras.optimizers.SGD(learning_rate=0.001)\n",
    "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()\n",
    "\n",
    "\n",
    "@tf.function\n",
    "def training_step(batch, labels):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        output=MLP(batch)\n",
    "        loss = loss_fn(labels, output)\n",
    "        \n",
    "    gradients = tape.gradient(loss, MLP.trainable_weights)\n",
    "    opt.apply_gradients(zip(gradients, MLP.trainable_weights))\n",
    "    \n",
    "    return loss\n",
    "\n",
    "\n",
    "for _ in range(4):\n",
    "    for i,(images, labels) in enumerate(training_data): \n",
    "        loss = training_step(images, labels)\n",
    "\n",
    "        if not i % 500:\n",
    "            print(i, \" : \", loss.numpy())\n",
    "            \n",
    "print(calculate_accuracy(MLP, training_data).numpy())\n",
    "print(calculate_accuracy(MLP, test_data).numpy())\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Going to fashion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnist = tf.keras.datasets.fashion_mnist\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "x_train = tf.cast(x_train, tf.float32)\n",
    "x_test = tf.cast(x_test, tf.float32)\n",
    "visualize_images(x_train)\n",
    "\n",
    "training_data = tf.data.Dataset.from_tensor_slices((x_train,y_train.reshape(-1,))).shuffle(10000).batch(32)\n",
    "test_data = tf.data.Dataset.from_tensors((x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(4):\n",
    "    for i,(images, labels) in enumerate(training_data): \n",
    "        loss = training_step(images, labels)\n",
    "\n",
    "        if not i % 500:\n",
    "            print(i, \" : \", loss.numpy())\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(calculate_accuracy(MLP, training_data).numpy())\n",
    "print(calculate_accuracy(MLP, test_data).numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "ooooh"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CNN time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnn = tf.keras.models.Sequential([\n",
    "    tf.keras.Input([28, 28]),\n",
    "    tf.keras.layers.Reshape([28,28,1]),\n",
    "    tf.keras.layers.Conv2D(8, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Conv2D(16, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Flatten(),\n",
    "    tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n",
    "])\n",
    "\n",
    "@tf.function\n",
    "def training_step(batch, labels):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        output=cnn(batch)\n",
    "        loss = loss_fn(labels, output)\n",
    "        \n",
    "    gradients = tape.gradient(loss, cnn.trainable_weights)\n",
    "    opt.apply_gradients(zip(gradients, cnn.trainable_weights))\n",
    "    \n",
    "    return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(4):\n",
    "    for i,(images, labels) in enumerate(training_data): \n",
    "        loss = training_step(tf.cast(images,tf.float32), labels)\n",
    "\n",
    "        if not i % 500:\n",
    "            print(i, \" : \", loss.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(calculate_accuracy(cnn, training_data).numpy())\n",
    "print(calculate_accuracy(cnn, test_data).numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnn = tf.keras.models.Sequential([\n",
    "    tf.keras.Input([28, 28]),\n",
    "    tf.keras.layers.Reshape([28,28,1]),\n",
    "    tf.keras.layers.Conv2D(64, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Conv2D(32, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Conv2D(16, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Conv2D(8, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Conv2D(4, kernel_size=3,padding='valid', activation=tf.nn.relu),\n",
    "    tf.keras.layers.Flatten(),\n",
    "    tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n",
    "])\n",
    "\n",
    "@tf.function\n",
    "def training_step(batch, labels):\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        output=cnn(batch)\n",
    "        loss = loss_fn(labels, output)\n",
    "        \n",
    "    gradients = tape.gradient(loss, cnn.trainable_weights)\n",
    "    opt.apply_gradients(zip(gradients, cnn.trainable_weights))\n",
    "    \n",
    "    return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(4):\n",
    "    for i,(images, labels) in enumerate(training_data): \n",
    "        loss = training_step(tf.cast(images,tf.float32), labels)\n",
    "\n",
    "        if not i % 500:\n",
    "            print(i, \" : \", loss.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(calculate_accuracy(cnn, training_data).numpy())\n",
    "print(calculate_accuracy(cnn, test_data).numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RNNs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')\n",
    "text = open(path_to_file, 'rb').read().decode(encoding='utf-8')\n",
    "print ('Length of text: {} characters'.format(len(text)))\n",
    "print(text[:250])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "characters = sorted(set(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(characters))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "char_to_index = {u:i for i, u in enumerate(characters)}\n",
    "index_to_char = np.array(characters)\n",
    "text_int = np.array([char_to_index[k] for k in text])\n",
    "\n",
    "data = tf.data.Dataset.from_tensor_slices(text_int).batch(100, drop_remainder=True).shuffle(1000).repeat().batch(16, drop_remainder=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for seq in data:\n",
    "    #print(seq)\n",
    "    #print([index_to_char[i] for i in seq.numpy()])\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "opt = tf.keras.optimizers.Adam(learning_rate=0.1)\n",
    "rnn = tf.keras.models.Sequential([\n",
    "    tf.keras.Input([100, 65]),\n",
    "    tf.keras.layers.LSTM(30, \n",
    "                       return_sequences=True,),\n",
    "    tf.keras.layers.Dense(65)\n",
    "])\n",
    "loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n",
    "for i, seq in enumerate(data):\n",
    "    one_hot_seq = tf.one_hot(seq, depth=65, axis=-1)\n",
    "    input_seq = one_hot_seq[:,:-1]\n",
    "    output_seq = seq[:,1:]\n",
    "    \n",
    "    with tf.GradientTape() as tape:\n",
    "        output = rnn(input_seq)\n",
    "        loss = tf.reduce_mean(loss_fn(output_seq,output))\n",
    "    \n",
    "    gradients = tape.gradient(loss, rnn.trainable_weights)\n",
    "    opt.apply_gradients(zip(gradients, rnn.trainable_weights))\n",
    "    \n",
    "    rnn.reset_states() \n",
    "    \n",
    "    if not i % 100 :\n",
    "        print(i, loss)\n",
    "        \n",
    "    if i >= 2000:\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## look at generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_letter = 'l'\n",
    "start_letter_index = np.array([char_to_index[start_letter]])\n",
    "print(start_letter_index)\n",
    "\n",
    "reshaped = tf.reshape(start_letter_index, [1,1])\n",
    "\n",
    "print(reshaped)\n",
    "next_input = tf.one_hot(reshaped, axis=-1, depth=65)\n",
    "\n",
    "gen_text = str(start_letter)\n",
    "\n",
    "rnn.reset_states()\n",
    "\n",
    "for i in range(500):\n",
    "    n = rnn(next_input)\n",
    "    # tmp = np.asarray([np.argmax(n[0,0].numpy())])\n",
    "    tmp = np.random.choice(65, 1, p=tf.nn.softmax(n[0,0]).numpy())\n",
    "    gen_text += index_to_char[tmp[0]]\n",
    "    next_input = tf.one_hot(tf.reshape(tmp, [1,1]), axis=-1, depth=65)\n",
    "\n",
    "print(gen_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}