Initial commit

2026-02-20 13:50:41 +00:00 · 2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: aladdinpersson # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+ML/Pytorch/more_advanced/image_captioning/flickr8k/
+ML/algorithms/svm/__pycache__/utils.cpython-38.pyc
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,33 @@
+language: python
+
+# Default Python version
+python: 3.8
+
+# Install ruby to get gem command
+before_install:
+  - sudo apt-add-repository -y ppa:brightbox/ruby-ng
+  - sudo apt-get -y update
+  - sudo apt-get -y install ruby-full
+
+install:
+    - pip install torch
+    - pip install codecov==2.0.15
+    - pip install pytest-cov==2.7.1
+
+#before_install:
+#    - cd Algorithm_tests/sorting_tests
+# Install awesome_bot for README.md broken link checking
+
+before_script:
+  - gem install awesome_bot
+
+script:
+     - awesome_bot README.md --allow-dupe --allow-redirect
+     #- flake8 --max-line-length=88
+     - pytest --cov=investpy ML_tests/
+     #- python ML_tests/LinearRegression_tests/LinearRegression_GD.py
+     #- python ML_tests/LinearRegression_tests/LinearRegression_normal.py
+
+#after_success:
+#  pass
+#- codecov
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Aladdin Persson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/ML/Kaggles/SantanderTransaction/dataset.py
+++ b/ML/Kaggles/SantanderTransaction/dataset.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import torch
+from torch.utils.data import TensorDataset
+from torch.utils.data.dataset import random_split
+from math import ceil
+
+def get_data():
+    train_data = pd.read_csv("new_shiny_train.csv")
+    y = train_data["target"]
+    X = train_data.drop(["ID_code", "target"], axis=1)
+    X_tensor = torch.tensor(X.values, dtype=torch.float32)
+    y_tensor = torch.tensor(y.values, dtype=torch.float32)
+    ds = TensorDataset(X_tensor, y_tensor)
+    train_ds, val_ds = random_split(ds, [int(0.999*len(ds)), ceil(0.001*len(ds))])
+
+    test_data = pd.read_csv("new_shiny_test.csv")
+    test_ids = test_data["ID_code"]
+    X = test_data.drop(["ID_code"], axis=1)
+    X_tensor = torch.tensor(X.values, dtype=torch.float32)
+    y_tensor = torch.tensor(y.values, dtype=torch.float32)
+    test_ds = TensorDataset(X_tensor, y_tensor)
+
+    return train_ds, val_ds, test_ds, test_ids
--- a/ML/Kaggles/SantanderTransaction/get_data.ipynb
+++ b/ML/Kaggles/SantanderTransaction/get_data.ipynb
--- a/ML/Kaggles/SantanderTransaction/train.py
+++ b/ML/Kaggles/SantanderTransaction/train.py
@@ -0,0 +1,55 @@
+import torch
+from sklearn import metrics
+from tqdm import tqdm
+import torch.nn as nn
+import torch.optim as optim
+from utils import get_predictions
+from dataset import get_data
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+
+class NN(nn.Module):
+    def __init__(self, input_size, hidden_dim):
+        super(NN, self).__init__()
+        self.bn = nn.BatchNorm1d(input_size)
+        self.fc1 = nn.Linear(2, hidden_dim)
+        self.fc2 = nn.Linear(input_size//2*hidden_dim, 1)
+
+    def forward(self, x):
+        N = x.shape[0]
+        x = self.bn(x)
+        orig_features = x[:, :200].unsqueeze(2) # (N, 200, 1)
+        new_features = x[:, 200:].unsqueeze(2) # (N, 200, 1)
+        x = torch.cat([orig_features, new_features], dim=2) # (N, 200, 2)
+        x = F.relu(self.fc1(x)).reshape(N, -1) # (N, 200*hidden_dim)
+        return torch.sigmoid(self.fc2(x)).view(-1)
+
+
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+model = NN(input_size=400, hidden_dim=100).to(DEVICE)
+optimizer = optim.Adam(model.parameters(), lr=2e-3, weight_decay=1e-4)
+loss_fn = nn.BCELoss()
+train_ds, val_ds, test_ds, test_ids = get_data()
+train_loader = DataLoader(train_ds, batch_size=1024, shuffle=True)
+val_loader = DataLoader(val_ds, batch_size=1024)
+test_loader = DataLoader(test_ds, batch_size=1024)
+
+for epoch in range(20):
+    probabilities, true = get_predictions(val_loader, model, device=DEVICE)
+    print(f"VALIDATION ROC: {metrics.roc_auc_score(true, probabilities)}")
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        data = data.to(DEVICE)
+        targets = targets.to(DEVICE)
+
+        # forward
+        scores = model(data)
+        loss = loss_fn(scores, targets)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+from utils import get_submission
+get_submission(model, test_loader, test_ids, DEVICE)
+
+
--- a/ML/Kaggles/SantanderTransaction/utils.py
+++ b/ML/Kaggles/SantanderTransaction/utils.py
@@ -0,0 +1,39 @@
+import pandas as pd
+import numpy as np
+import torch
+
+def get_predictions(loader, model, device):
+    model.eval()
+    saved_preds = []
+    true_labels = []
+
+    with torch.no_grad():
+        for x,y in loader:
+            x = x.to(device)
+            y = y.to(device)
+            scores = model(x)
+            saved_preds += scores.tolist()
+            true_labels += y.tolist()
+
+    model.train()
+    return saved_preds, true_labels
+
+def get_submission(model, loader, test_ids, device):
+    all_preds = []
+    model.eval()
+    with torch.no_grad():
+        for x,y in loader:
+            print(x.shape)
+            x = x.to(device)
+            score = model(x)
+            prediction = score.float()
+            all_preds += prediction.tolist()
+
+    model.train()
+
+    df = pd.DataFrame({
+        "ID_code" : test_ids.values,
+        "target" : np.array(all_preds)
+    })
+
+    df.to_csv("sub.csv", index=False)
--- a/ML/Kaggles/Titanic/FirstKaggle_Titanic.ipynb
+++ b/ML/Kaggles/Titanic/FirstKaggle_Titanic.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "electoral-scientist",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "surrounded-albert",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv(\"train.csv\")\n",
+    "test = pd.read_csv(\"test.csv\")\n",
+    "test_ids = test[\"PassengerId\"]\n",
+    "\n",
+    "def clean(data):\n",
+    "    data = data.drop([\"Ticket\", \"PassengerId\", \"Name\", \"Cabin\"], axis=1)\n",
+    "    \n",
+    "    cols = [\"SibSp\", \"Parch\", \"Fare\", \"Age\"]\n",
+    "    for col in cols:\n",
+    "        data[col].fillna(data[col].median(), inplace=True)\n",
+    "        \n",
+    "    data.Embarked.fillna(\"U\", inplace=True)\n",
+    "    return data\n",
+    "\n",
+    "data = clean(data)\n",
+    "test = clean(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "electronic-wyoming",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Survived  Pclass     Sex   Age  SibSp  Parch     Fare Embarked\n",
+       "0         0       3    male  22.0      1      0   7.2500        S\n",
+       "1         1       1  female  38.0      1      0  71.2833        C\n",
+       "2         1       3  female  26.0      0      0   7.9250        S"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "legendary-conditions",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['female' 'male']\n",
+      "['C' 'Q' 'S' 'U']\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked\n",
+       "0         0       3    1  22.0      1      0   7.2500         2\n",
+       "1         1       1    0  38.0      1      0  71.2833         0\n",
+       "2         1       3    0  26.0      0      0   7.9250         2\n",
+       "3         1       1    0  35.0      1      0  53.1000         2\n",
+       "4         0       3    1  35.0      0      0   8.0500         2"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn import preprocessing\n",
+    "le = preprocessing.LabelEncoder()\n",
+    "columns = [\"Sex\", \"Embarked\"]\n",
+    "\n",
+    "for col in columns:\n",
+    "    data[col] = le.fit_transform(data[col])\n",
+    "    test[col] = le.transform(test[col])\n",
+    "    print(le.classes_)\n",
+    "      \n",
+    "data.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "assumed-screening",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "y = data[\"Survived\"]\n",
+    "X = data.drop(\"Survived\", axis=1)\n",
+    "\n",
+    "X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "industrial-internship",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf = LogisticRegression(random_state=0, max_iter=1000).fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fifteen-enemy",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8888888888888888"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = clf.predict(X_val)\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "accuracy_score(y_val, predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "juvenile-anthropology",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submission_preds = clf.predict(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "virgin-settlement",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame({\"PassengerId\": test_ids.values,\n",
+    "                   \"Survived\": submission_preds,\n",
+    "                  })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "tribal-bidding",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"submission.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/ML/PaperReviews/Randaugment.pdf
+++ b/ML/PaperReviews/Randaugment.pdf
--- a/ML/PaperReviews/UNET.pdf
+++ b/ML/PaperReviews/UNET.pdf
--- a/ML/Projects/DeepSort/sort_w_attention.py
+++ b/ML/Projects/DeepSort/sort_w_attention.py
@@ -0,0 +1,216 @@
+"""
+Training a Pointer Network which is a modified
+Seq2Seq with attention network for the task of
+sorting arrays.
+"""
+
+from torch.utils.data import (
+    Dataset,
+    DataLoader,
+)
+import random
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from utils import sort_array, save_checkpoint, load_checkpoint
+from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard
+
+
+class SortArray(Dataset):
+    def __init__(self, batch_size, min_int, max_int, min_size, max_size):
+        self.batch_size = batch_size
+        self.min_int = min_int
+        self.max_int = max_int + 1
+        self.min_size = min_size
+        self.max_size = max_size + 1
+        self.start_tok = torch.tensor([-1]).expand(1, self.batch_size)
+
+    def __len__(self):
+        return 10000 // self.batch_size
+
+    def __getitem__(self, index):
+        size_of_array = torch.randint(
+            low=self.min_size, high=self.max_size, size=(1, 1)
+        )
+
+        unsorted_arr = torch.rand(size=(size_of_array, self.batch_size)) * (
+            self.max_int - self.min_int
+        )
+        # unsorted_arr = torch.randint(
+        #    low=self.min_int, high=self.max_int, size=(size_of_array, self.batch_size)
+        # )
+        sorted_arr, indices = torch.sort(unsorted_arr, dim=0)
+
+        return unsorted_arr.float(), torch.cat((self.start_tok, indices), 0)
+
+
+class Encoder(nn.Module):
+    def __init__(self, hidden_size, num_layers):
+        super(Encoder, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+
+        self.rnn = nn.LSTM(1, hidden_size, num_layers)
+
+    def forward(self, x):
+        embedding = x.unsqueeze(2)
+        # embedding shape: (seq_length, N, 1)
+
+        encoder_states, (hidden, cell) = self.rnn(embedding)
+        # encoder_states: (seq_length, N, hidden_size)
+
+        return encoder_states, hidden, cell
+
+
+class Decoder(nn.Module):
+    def __init__(self, hidden_size, num_layers, units=100):
+        super(Decoder, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.rnn = nn.LSTM(hidden_size + 1, hidden_size, num_layers)
+        self.energy = nn.Linear(hidden_size * 2, units)
+        self.fc = nn.Linear(units, 1)
+        self.softmax = nn.Softmax(dim=0)
+        self.relu = nn.ReLU()
+
+    def forward(self, x, encoder_states, hidden, cell):
+        sequence_length = encoder_states.shape[0]
+        batch_size = encoder_states.shape[1]
+
+        h_reshaped = hidden.repeat(sequence_length, 1, 1)
+        energy = self.relu(self.energy(torch.cat((h_reshaped, encoder_states), dim=2)))
+        energy = self.fc(energy)
+
+        # energy: (seq_length, N, 1)
+        attention = self.softmax(energy)
+
+        # attention: (seq_length, N, 1), snk
+        # encoder_states: (seq_length, N, hidden_size), snl
+        # we want context_vector: (1, N, hidden_size), i.e knl
+        context_vector = torch.einsum("snk,snl->knl", attention, encoder_states)
+        rnn_input = torch.cat([context_vector, x.unsqueeze(0).unsqueeze(2)], dim=2)
+
+        # rnn_input: (1, N, hidden_size)
+        _, (hidden, cell) = self.rnn(rnn_input, (hidden, cell))
+        return attention.squeeze(2), energy.squeeze(2), hidden, cell
+
+
+class Seq2Seq(nn.Module):
+    def __init__(self, encoder, decoder):
+        super(Seq2Seq, self).__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+
+    def forward(self, source, target, teacher_force_ratio=0.5):
+        batch_size = source.shape[1]
+        target_len = target.shape[0]
+
+        outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
+        encoder_states, hidden, cell = self.encoder(source)
+
+        # First input will be <SOS> token
+        x = target[0]
+        predictions = torch.zeros(target_len, batch_size)
+
+        for t in range(1, target_len):
+            # At every time step use encoder_states and update hidden, cell
+            attention, energy, hidden, cell = self.decoder(
+                x, encoder_states, hidden, cell
+            )
+
+            # Store prediction for current time step
+            outputs[t] = energy.permute(1, 0)
+
+            # Get the best word the Decoder predicted (index in the vocabulary)
+            best_guess = attention.argmax(0)
+            predictions[t, :] = best_guess
+
+            # With probability of teacher_force_ratio we take the actual next word
+            # otherwise we take the word that the Decoder predicted it to be.
+            # Teacher Forcing is used so that the model gets used to seeing
+            # similar inputs at training and testing time, if teacher forcing is 1
+            # then inputs at test time might be completely different than what the
+            # network is used to. This was a long comment.
+            x = target[t] if random.random() < teacher_force_ratio else best_guess
+
+        return outputs, predictions[1:, :]
+
+
+### We're ready to define everything we need for training our Seq2Seq model ###
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+load_model = False
+save_model = True
+
+# Training hyperparameters
+num_epochs = 1000
+learning_rate = 3e-5
+batch_size = 32
+hidden_size = 1024
+num_layers = 1  # Current implementation is only for 1 layered
+min_int = 1
+max_int = 10
+min_size = 2
+max_size = 15
+
+# Tensorboard to get nice plots etc
+writer = SummaryWriter(f"runs/loss_plot2")
+step = 0
+
+encoder_net = Encoder(hidden_size, num_layers).to(device)
+decoder_net = Decoder(hidden_size, num_layers).to(device)
+
+model = Seq2Seq(encoder_net, decoder_net).to(device)
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+criterion = nn.CrossEntropyLoss()
+
+if load_model:
+    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
+
+# following is for testing the network, uncomment this if you want
+# to try out a few arrays interactively
+# sort_array(encoder_net, decoder_net, device)
+
+dataset = SortArray(batch_size, min_int, max_int, min_size, max_size)
+train_loader = DataLoader(dataset, batch_size=1, shuffle=False)
+
+for epoch in range(num_epochs):
+    print(f"[Epoch {epoch} / {num_epochs}]")
+
+    if save_model:
+        checkpoint = {
+            "state_dict": model.state_dict(),
+            "optimizer": optimizer.state_dict(),
+            "steps": step,
+        }
+        save_checkpoint(checkpoint)
+
+    for batch_idx, (unsorted_arrs, sorted_arrs) in enumerate(train_loader):
+        inp_data = unsorted_arrs.squeeze(0).to(device)
+        target = sorted_arrs.squeeze(0).to(device)
+
+        # Forward prop
+        output, prediction = model(inp_data, target)
+
+        # Remove output first element (because of how we did the look in Seq2Seq
+        # starting at t = 1, then reshape so that we obtain (N*seq_len, seq_len)
+        # and target will be (N*seq_len)
+        output = output[1:].reshape(-1, output.shape[2])
+        target = target[1:].reshape(-1)
+
+        optimizer.zero_grad()
+        loss = criterion(output, target)
+
+        # Back prop
+        loss.backward()
+
+        # Clip to avoid exploding gradient issues, makes sure grads are
+        # within a healthy range
+        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
+
+        # Gradient descent step
+        optimizer.step()
+
+        # plot to tensorboard
+        writer.add_scalar("Training loss", loss, global_step=step)
+        step += 1
--- a/ML/Projects/DeepSort/utils.py
+++ b/ML/Projects/DeepSort/utils.py
@@ -0,0 +1,75 @@
+import torch
+
+
+def ask_user():
+    print("Write your array as a list [i,j,k..] with arbitrary positive numbers")
+    array = input("Input q if you want to quit \n")
+    return array
+
+
+def sort_array(encoder, decoder, device, arr=None):
+    """
+    A very simple example of use of the model
+    Input: encoder nn.Module
+           decoder nn.Module
+           device
+           array to sort (optional)
+    """
+
+    if arr is None:
+        arr = ask_user()
+
+    with torch.no_grad():
+        while arr != "q":
+            # Avoid numerical errors by rounding to max_len
+            arr = eval(arr)
+            lengths = [
+                len(str(elem).split(".")[1]) if len(str(elem).split(".")) > 1 else 0
+                for elem in arr
+            ]
+            max_len = max(lengths)
+            source = torch.tensor(arr, dtype=torch.float).to(device).unsqueeze(1)
+            batch_size = source.shape[1]
+            target_len = source.shape[0] + 1
+
+            outputs = torch.zeros(target_len, batch_size, target_len - 1).to(device)
+            encoder_states, hidden, cell = encoder(source)
+
+            # First input will be <SOS> token
+            x = torch.tensor([-1], dtype=torch.float).to(device)
+            predictions = torch.zeros((target_len)).to(device)
+
+            for t in range(1, target_len):
+                # At every time step use encoder_states and update hidden, cell
+                attention, energy, hidden, cell = decoder(
+                    x, encoder_states, hidden, cell
+                )
+
+                # Store prediction for current time step
+                outputs[t] = energy.permute(1, 0)
+
+                # Get the best word the Decoder predicted (index in the vocabulary)
+                best_guess = attention.argmax(0)
+                predictions[t] = best_guess.item()
+                x = torch.tensor([best_guess.item()], dtype=torch.float).to(device)
+
+            output = [
+                round(source[predictions[1:].long()][i, :].item(), max_len)
+                for i in range(source.shape[0])
+            ]
+
+            print(f"Here's the result: {output}")
+            arr = ask_user()
+
+
+def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
+    print("=> Saving checkpoint")
+    torch.save(state, filename)
+
+
+def load_checkpoint(checkpoint, model, optimizer):  # , steps):
+    print("=> Loading checkpoint")
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+    # steps = checkpoint['steps']
+    # return steps
--- a/ML/Projects/Exploring_MNIST/README.md
+++ b/ML/Projects/Exploring_MNIST/README.md
@@ -0,0 +1,45 @@
+# Exploring the MNIST dataset with PyTorch
+
+The goal of this small project of mine is to learn different models and then try and see what kind of test accuracies we can get on the MNIST dataset. I checked some popular models (LeNet, VGG, Inception net, ResNet) and likely I will try more out in the future as I learn more network architectures. I used an exponential learning rate decay and data augmentation, in the beginning I was just using every data augmentation other people were using but I learned that using RandomHorizontalFlip when learning to recognize digits might not be so useful (heh). I also used a lambda/weight decay of pretty standard 5e-4. My thinking during training was first that I split into a validationset of about 10000 examples and made sure that it was getting high accuracies on validationset with current hyperparameters. After making sure that it wasn't just overfitting the training set, I changed so that the model used all of the training examples (60000) and then when finished training to about ~99.9% training accuracy I tested on the test set.
+
+## Accuracy
+| Model |  Number of epochs  | Training set acc. | Test set acc. |
+| ----------------- | ----------- | ----------------- | ----------- |
+| [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf) | 150 | 99.69%      | 99.12%  |
+| [VGG13](https://arxiv.org/abs/1409.1556)              | 100 |  99.95%      |  99.67%   |
+| [VGG16](https://arxiv.org/abs/1409.1556)              | 100 |  99.92%      |  99.68%   |
+| [GoogLeNet](https://arxiv.org/abs/1409.4842)          | 100 |  99.90%      |  99.71%   |
+| [ResNet101](https://arxiv.org/abs/1512.03385)          | 100 | 99.90%      |  99.68%  |
+
+TODO: MobileNet, ResNext, SqueezeNet, .., ?
+
+### Comments and things to improve
+I believe LeNet has more potential as it's not really overfitting the training set that well and needs more epochs. I believe that in the original paper by LeCun et. al. (1998) showed that they achieved about 99.1% test accuracy which is similar to my results but we also need to remember the limitations that were back then. I do think training it for a bit longer to make it ~99.8-99.9% on training set would get it up to perhaps 99.2-99.3% test accuracy if we're lucky. So far the other models I think have performed quite well and is close, at least from my understanding, to current state of the art. If you would like to really maximize accuracy you would train an ensemble of models and then average their predictions to achieve better accuracy but I've not done that here as I don't think it's that interesting. This was mostly to learn different network architectures and to then check if they work as intended. If you find anything that I can improve or any mistakes, please tell me what and I'll do my best to fix it!
+
+### How to run
+```bash
+usage: train.py [-h] [--resume PATH] [--lr LR] [--weight-decay R]
+                [--momentum R] [--epochs N] [--batch-size N]
+                [--log-interval N] [--seed S] [--number-workers S]
+                [--init-padding S] [--create-validationset] [--save-model]
+
+PyTorch MNIST
+
+optional arguments:
+  --resume PATH Saved model. (ex: PATH = checkpoint/mnist_LeNet.pth.tar)
+  --batch-size N (ex: --batch-size 64), default is 128.
+  --epochs N  (ex: --epochs 10) default is 100.
+  --lr LR learning rate (ex: --lr 0.01), default is 0.001.
+  --momentum M SGD w momentum (ex: --momentum 0.5), default is 0.9.
+  --seed S random seed (ex: --seed 3), default is 1.
+  --log-interval N print accuracy ever N mini-batches, ex (--log-interval 50), default 240.
+  --init-padding S Initial padding on images (ex: --init-padding 5), default is 2 to make 28x28 into 32x32.
+  --create-validation to create validationset
+  --save-model to save weights
+  --weight-decay R What weight decay you want (ex: --weight-decay 1e-4), default 1e-5.
+  --number-workers S How many num workers you want in PyTorch (ex --number-workers 2), default is 0.
+
+
+Example of a run is:
+python train.py --save-model --resume checkpoint/mnist_LeNet.pth.tar --weight-decay 1e-5 --number-workers 2
+```
--- a/ML/Projects/Exploring_MNIST/networks/pycache/googLeNet.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/networks/pycache/googLeNet.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/networks/pycache/import_all_networks.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/networks/pycache/import_all_networks.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/networks/pycache/lenet.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/networks/pycache/lenet.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/networks/pycache/resnet.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/networks/pycache/resnet.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/networks/pycache/vgg.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/networks/pycache/vgg.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/networks/googLeNet.py
+++ b/ML/Projects/Exploring_MNIST/networks/googLeNet.py
@@ -0,0 +1,109 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Inception(nn.Module):
+    def __init__(
+        self, in_channels, out1x1, out3x3reduced, out3x3, out5x5reduced, out5x5, outpool
+    ):
+        super().__init__()
+
+        self.branch_1 = BasicConv2d(in_channels, out1x1, kernel_size=1, stride=1)
+
+        self.branch_2 = nn.Sequential(
+            BasicConv2d(in_channels, out3x3reduced, kernel_size=1),
+            BasicConv2d(out3x3reduced, out3x3, kernel_size=3, padding=1),
+        )
+
+        # Is in the original googLeNet paper 5x5 conv but in Inception_v2 it has shown to be
+        # more efficient if you instead do two 3x3 convs which is what I am doing here!
+        self.branch_3 = nn.Sequential(
+            BasicConv2d(in_channels, out5x5reduced, kernel_size=1),
+            BasicConv2d(out5x5reduced, out5x5, kernel_size=3, padding=1),
+            BasicConv2d(out5x5, out5x5, kernel_size=3, padding=1),
+        )
+
+        self.branch_4 = nn.Sequential(
+            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
+            BasicConv2d(in_channels, outpool, kernel_size=1),
+        )
+
+    def forward(self, x):
+        y1 = self.branch_1(x)
+        y2 = self.branch_2(x)
+        y3 = self.branch_3(x)
+        y4 = self.branch_4(x)
+
+        return torch.cat([y1, y2, y3, y4], 1)
+
+
+class GoogLeNet(nn.Module):
+    def __init__(self, img_channel):
+        super().__init__()
+
+        self.first_layers = nn.Sequential(
+            BasicConv2d(img_channel, 192, kernel_size=3, padding=1)
+        )
+
+        self._3a = Inception(192, 64, 96, 128, 16, 32, 32)
+        self._3b = Inception(256, 128, 128, 192, 32, 96, 64)
+
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        self._4a = Inception(480, 192, 96, 208, 16, 48, 64)
+        self._4b = Inception(512, 160, 112, 224, 24, 64, 64)
+        self._4c = Inception(512, 128, 128, 256, 24, 64, 64)
+        self._4d = Inception(512, 112, 144, 288, 32, 64, 64)
+        self._4e = Inception(528, 256, 160, 320, 32, 128, 128)
+
+        self._5a = Inception(832, 256, 160, 320, 32, 128, 128)
+        self._5b = Inception(832, 384, 192, 384, 48, 128, 128)
+
+        self.avgpool = nn.AvgPool2d(kernel_size=8, stride=1)
+        self.linear = nn.Linear(1024, 10)
+
+    def forward(self, x):
+        out = self.first_layers(x)
+
+        out = self._3a(out)
+        out = self._3b(out)
+        out = self.maxpool(out)
+
+        out = self._4a(out)
+        out = self._4b(out)
+        out = self._4c(out)
+        out = self._4d(out)
+        out = self._4e(out)
+        out = self.maxpool(out)
+
+        out = self._5a(out)
+        out = self._5b(out)
+
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+
+        return out
+
+
+class BasicConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+        self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return F.relu(x, inplace=True)
+
+
+def test():
+    net = GoogLeNet(1)
+    x = torch.randn(3, 1, 32, 32)
+    y = net(x)
+    print(y.size())
+
+
+# test()
--- a/ML/Projects/Exploring_MNIST/networks/import_all_networks.py
+++ b/ML/Projects/Exploring_MNIST/networks/import_all_networks.py
@@ -0,0 +1,4 @@
+from networks.vgg import VGG
+from networks.lenet import LeNet
+from networks.resnet import ResNet, residual_template, ResNet50, ResNet101, ResNet152
+from networks.googLeNet import BasicConv2d, Inception, GoogLeNet
--- a/ML/Projects/Exploring_MNIST/networks/lenet.py
+++ b/ML/Projects/Exploring_MNIST/networks/lenet.py
@@ -0,0 +1,60 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LeNet(nn.Module):
+    def __init__(self, in_channels, init_weights=True, num_classes=10):
+        super(LeNet, self).__init__()
+
+        self.num_classes = num_classes
+
+        if init_weights:
+            self._initialize_weights()
+
+        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=6, kernel_size=5)
+        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        z1 = self.conv1(x)  # 6 x 28 x 28
+        a1 = F.relu(z1)  # 6 x 28 x 28
+        a1 = F.max_pool2d(a1, kernel_size=2, stride=2)  # 6 x 14 x 14
+        z2 = self.conv2(a1)  # 16 x 10 x 10
+        a2 = F.relu(z2)  # 16 x 10 x 10
+        a2 = F.max_pool2d(a2, kernel_size=2, stride=2)  # 16 x 5 x 5
+        flatten_a2 = a2.view(a2.size(0), -1)
+        z3 = self.fc1(flatten_a2)
+        a3 = F.relu(z3)
+        z4 = self.fc2(a3)
+        a4 = F.relu(z4)
+        z5 = self.fc3(a4)
+        return z5
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
+
+
+def test_lenet():
+    net = LeNet(1)
+    x = torch.randn(64, 1, 32, 32)
+    y = net(x)
+    print(y.size())
+
+
+test_lenet()
--- a/ML/Projects/Exploring_MNIST/networks/resnet.py
+++ b/ML/Projects/Exploring_MNIST/networks/resnet.py
@@ -0,0 +1,151 @@
+import torch
+import torch.nn as nn
+
+
+class residual_template(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_channels, out_channels, stride=1, identity_downsample=None):
+        super().__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+        )
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.conv3 = nn.Conv2d(
+            out_channels, out_channels * self.expansion, kernel_size=1, bias=False
+        )
+        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.identity_downsample = identity_downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.identity_downsample is not None:
+            residual = self.identity_downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, residual_template, layers, image_channel, num_classes=10):
+        self.in_channels = 64
+        super().__init__()
+
+        self.conv1 = nn.Conv2d(
+            in_channels=image_channel,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=False,
+        )
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(
+            residual_template, layers[0], channels=64, stride=1
+        )
+        self.layer2 = self._make_layer(
+            residual_template, layers[1], channels=128, stride=2
+        )
+        self.layer3 = self._make_layer(
+            residual_template, layers[2], channels=256, stride=2
+        )
+        self.layer4 = self._make_layer(
+            residual_template, layers[3], channels=512, stride=2
+        )
+        self.avgpool = nn.AvgPool2d(kernel_size=4, stride=1)
+        self.fc = nn.Linear(512 * residual_template.expansion, num_classes)
+
+        # initialize weights for conv layers, batch layers
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, residual_template, num_residuals_blocks, channels, stride):
+        identity_downsample = None
+
+        if stride != 1 or self.in_channels != channels * residual_template.expansion:
+            identity_downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.in_channels,
+                    channels * residual_template.expansion,
+                    kernel_size=1,
+                    stride=stride,
+                    bias=False,
+                ),
+                nn.BatchNorm2d(channels * residual_template.expansion),
+            )
+
+        layers = []
+        layers.append(
+            residual_template(self.in_channels, channels, stride, identity_downsample)
+        )
+        self.in_channels = channels * residual_template.expansion
+
+        for i in range(1, num_residuals_blocks):
+            layers.append(residual_template(self.in_channels, channels))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def ResNet50(img_channel):
+    return ResNet(residual_template, [3, 4, 6, 3], img_channel)
+
+
+def ResNet101(img_channel):
+    return ResNet(residual_template, [3, 4, 23, 3], img_channel)
+
+
+def ResNet152(img_channel):
+    return ResNet(residual_template, [3, 8, 36, 3], img_channel)
+
+
+def test():
+    net = ResNet152(img_channel=1)
+    y = net(torch.randn(64, 1, 32, 32))
+    print(y.size())
+
+
+# test()
--- a/ML/Projects/Exploring_MNIST/networks/vgg.py
+++ b/ML/Projects/Exploring_MNIST/networks/vgg.py
@@ -0,0 +1,139 @@
+import torch
+import torch.nn as nn
+
+
+VGG_types = {
+    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
+    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
+    "VGG16": [
+        64,
+        64,
+        "M",
+        128,
+        128,
+        "M",
+        256,
+        256,
+        256,
+        "M",
+        512,
+        512,
+        512,
+        "M",
+        512,
+        512,
+        512,
+        "M",
+    ],
+    "VGG19": [
+        64,
+        64,
+        "M",
+        128,
+        128,
+        "M",
+        256,
+        256,
+        256,
+        256,
+        "M",
+        512,
+        512,
+        512,
+        512,
+        "M",
+        512,
+        512,
+        512,
+        512,
+        "M",
+    ],
+}
+
+
+class VGG(nn.Module):
+    def __init__(
+        self, vgg_type, in_channels, init_weights=True, batch_norm=True, num_classes=10
+    ):
+        super().__init__()
+
+        self.batch_norm = batch_norm
+        self.in_channels = in_channels
+
+        self.layout = self.create_architecture(VGG_types[vgg_type])
+        self.fc = nn.Linear(512, num_classes)
+
+        # self.fcs = nn.Sequential(
+        #     nn.Linear(512* 1 * 1, 4096),
+        #     nn.ReLU(inplace = False),
+        #     nn.Dropout(),
+        #     nn.Linear(4096, 4096),
+        #     nn.ReLU(inplace = False),
+        #     nn.Dropout(),
+        #     nn.Linear(4096, num_classes),
+        # )
+
+        if init_weights:
+            self._initialize_weights()
+
+    def forward(self, x):
+        out = self.layout(x)
+        out = out.view(out.size(0), -1)
+        out = self.fc(out)
+
+        return out
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.constant_(m.bias, 0)
+
+    def create_architecture(self, architecture):
+        layers = []
+
+        for x in architecture:
+            if type(x) == int:
+                out_channels = x
+
+                conv2d = nn.Conv2d(
+                    self.in_channels, out_channels, kernel_size=3, padding=1
+                )
+
+                if self.batch_norm:
+                    layers += [
+                        conv2d,
+                        nn.BatchNorm2d(out_channels),
+                        nn.ReLU(inplace=False),
+                    ]
+                else:
+                    layers += [conv2d, nn.ReLU(inplace=False)]
+
+                self.in_channels = out_channels
+
+            elif x == "M":
+                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
+
+        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+
+        return nn.Sequential(*layers)
+
+
+def test():
+    net = VGG("VGG16", 1)
+    x = torch.randn(64, 1, 32, 32)
+    y = net(x)
+    print(y.size())
+
+
+# test()
--- a/ML/Projects/Exploring_MNIST/train.py
+++ b/ML/Projects/Exploring_MNIST/train.py
@@ -0,0 +1,264 @@
+import argparse
+import os
+import shutil
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.utils.data
+
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torch.backends.cudnn as cudnn
+
+from torch.utils.data import DataLoader, SubsetRandomSampler
+from networks.import_all_networks import *
+from utils.import_utils import *
+
+
+class Train_MNIST(object):
+    def __init__(self):
+        self.best_acc = 0
+        self.in_channels = 1  # 1 because MNIST is grayscale
+        self.dataset = mnist_data  # Class that is imported from utils that imports data
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.dtype = torch.float32
+
+        self.args = self.prepare_args()
+        self.transform_train, self.transform_test = self.prepare_transformations()
+
+        if self.args.create_validationset:
+            (
+                self.loader_train,
+                self.loader_validation,
+                self.loader_test,
+            ) = self.prepare_data()
+            self.data_check_acc = self.loader_validation
+        else:
+            self.loader_train, self.loader_test = self.prepare_data()
+            self.data_check_acc = self.loader_train
+
+    def prepare_args(self):
+        parser = argparse.ArgumentParser(description="PyTorch MNIST")
+        parser.add_argument(
+            "--resume",
+            default="",
+            type=str,
+            metavar="PATH",
+            help="path to latest checkpoint (default: none)",
+        )
+        parser.add_argument(
+            "--lr",
+            default=0.001,
+            type=float,
+            metavar="LR",
+            help="initial learning rate",
+        )
+        parser.add_argument(
+            "--weight-decay",
+            default=1e-5,
+            type=float,
+            metavar="R",
+            help="L2 regularization lambda",
+        )
+        parser.add_argument(
+            "--momentum", default=0.9, type=float, metavar="M", help="SGD with momentum"
+        )
+        parser.add_argument(
+            "--epochs",
+            type=int,
+            default=100,
+            metavar="N",
+            help="number of epochs to train (default: 100)",
+        )
+        parser.add_argument(
+            "--batch-size",
+            type=int,
+            default=128,
+            metavar="N",
+            help="input batch size for training (default: 128)",
+        )
+        parser.add_argument(
+            "--log-interval",
+            type=int,
+            default=240,
+            metavar="N",
+            help="how many batches to wait before logging training status",
+        )
+        parser.add_argument(
+            "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
+        )
+        parser.add_argument(
+            "--number-workers",
+            type=int,
+            default=0,
+            metavar="S",
+            help="number of workers (default: 0)",
+        )
+        parser.add_argument(
+            "--init-padding",
+            type=int,
+            default=2,
+            metavar="S",
+            help=" If use initial padding or not. (default: 2 because mnist 28x28 to make 32x32)",
+        )
+        parser.add_argument(
+            "--create-validationset",
+            action="store_true",
+            default=False,
+            help="If you want to use a validation set (default: False). Default size = 10%",
+        )
+        parser.add_argument(
+            "--save-model",
+            action="store_true",
+            default=False,
+            help="If you want to save this model(default: False).",
+        )
+        args = parser.parse_args()
+        return args
+
+    def prepare_transformations(self):
+        transform_train = transforms.Compose(
+            [
+                transforms.Pad(self.args.init_padding),
+                transforms.ToTensor(),
+                transforms.Normalize((0.1307,), (0.3081,)),
+            ]
+        )
+
+        transform_test = transforms.Compose(
+            [
+                transforms.Pad(self.args.init_padding),
+                transforms.ToTensor(),
+                transforms.Normalize((0.1307,), (0.3081,)),
+            ]
+        )
+
+        return transform_train, transform_test
+
+    def prepare_data(self, shuffle=True):
+
+        data = self.dataset(
+            shuffle,
+            self.transform_train,
+            self.transform_test,
+            self.args.number_workers,
+            self.args.create_validationset,
+            self.args.batch_size,
+            validation_size=0.1,
+            random_seed=self.args.seed,
+        )
+
+        if self.args.create_validationset:
+            loader_train, loader_validation, loader_test = data.main()
+
+            return loader_train, loader_validation, loader_test
+
+        else:
+            loader_train, loader_test = data.main()
+
+            return loader_train, loader_test
+
+    def train(self):
+        criterion = nn.CrossEntropyLoss()
+        iter = 0
+
+        # vis_plotting = visdom_plotting()
+        loss_list, batch_list, epoch_list, validation_acc_list, training_acc_list = (
+            [],
+            [],
+            [0],
+            [0],
+            [0],
+        )
+
+        for epoch in range(self.args.epochs):
+            for batch_idx, (x, y) in enumerate(self.loader_train):
+                self.model.train()
+                x = x.to(device=self.device, dtype=self.dtype)
+                y = y.to(device=self.device, dtype=torch.long)
+
+                scores = self.model(x)
+                loss = criterion(scores, y)
+
+                loss_list.append(loss.item())
+                batch_list.append(iter + 1)
+                iter += 1
+
+                if batch_idx % self.args.log_interval == 0:
+                    print(f"Batch {batch_idx}, epoch {epoch}, loss = {loss.item()}")
+                    print()
+                    self.model.eval()
+                    train_acc = check_accuracy(self.data_check_acc, self.model)
+                    # validation_acc = self.check_accuracy(self.data_check_acc)
+                    validation_acc = 0
+                    validation_acc_list.append(validation_acc)
+                    training_acc_list.append(train_acc)
+                    epoch_list.append(epoch + 0.5)
+                    print()
+                    print()
+                    # call to plot in visdom
+                    # vis_plotting.create_plot(loss_list, batch_list, validation_acc_list, epoch_list, training_acc_list)
+
+                    # save checkpoint
+                    if train_acc > self.best_acc and self.args.save_model:
+                        self.best_acc = train_acc
+                        save_checkpoint(
+                            self.filename,
+                            self.model,
+                            self.optimizer,
+                            self.best_acc,
+                            epoch,
+                        )
+
+                self.model.train()
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+    def choose_network(self):
+        self.model = LeNet(
+            in_channels=self.in_channels, init_weights=True, num_classes=10
+        )
+        self.filename = "checkpoint/mnist_LeNet.pth.tar"
+
+        # self.model = VGG('VGG16', in_channels = self.in_channels)
+        # self.filename =  'checkpoint/mnist_VGG16.pth.tar'
+
+        # self.model = ResNet50(img_channel=1)
+        # self.filename =  'checkpoint/mnist_ResNet.pth.tar'
+
+        # self.model = GoogLeNet(img_channel=1)
+        # self.filename =  'checkpoint/mnist_GoogLeNet.pth.tar'
+
+        self.model = self.model.to(self.device)
+
+    def main(self):
+        if __name__ == "__main__":
+            self.choose_network()
+            self.optimizer = optim.SGD(
+                self.model.parameters(),
+                lr=self.args.lr,
+                weight_decay=self.args.weight_decay,
+                momentum=self.args.momentum,
+            )
+            cudnn.benchmark = True
+
+            if self.args.resume:
+                self.model.eval()
+                (
+                    self.model,
+                    self.optimizer,
+                    self.checkpoint,
+                    self.start_epoch,
+                    self.best_acc,
+                ) = load_model(self.args, self.model, self.optimizer)
+            else:
+                load_model(self.args, self.model, self.optimizer)
+
+            self.train()
+
+
+## Mnist
+network = Train_MNIST()
+Train_MNIST.main(network)
--- a/ML/Projects/Exploring_MNIST/utils/pycache/import_utils.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/utils/pycache/import_utils.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/utils/pycache/mnist_data.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/utils/pycache/mnist_data.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/utils/pycache/utils.cpython-37.pyc
+++ b/ML/Projects/Exploring_MNIST/utils/pycache/utils.cpython-37.pyc
--- a/ML/Projects/Exploring_MNIST/utils/import_utils.py
+++ b/ML/Projects/Exploring_MNIST/utils/import_utils.py
@@ -0,0 +1,2 @@
+from utils.mnist_data import mnist_data
+from utils.utils import check_accuracy, save_checkpoint, visdom_plotting, load_model
--- a/ML/Projects/Exploring_MNIST/utils/mnist_data.py
+++ b/ML/Projects/Exploring_MNIST/utils/mnist_data.py
@@ -0,0 +1,94 @@
+import numpy as np
+import torchvision.datasets as datasets
+from torch.utils.data import DataLoader, SubsetRandomSampler
+
+
+class mnist_data(object):
+    def __init__(
+        self,
+        shuffle,
+        transform_train,
+        transform_test,
+        num_workers=0,
+        create_validation_set=True,
+        batch_size=128,
+        validation_size=0.2,
+        random_seed=1,
+    ):
+        self.shuffle = shuffle
+        self.validation_size = validation_size
+        self.transform_train = transform_train
+        self.transform_test = transform_test
+        self.random_seed = random_seed
+        self.create_validation_set = create_validation_set
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+
+    def download_data(self):
+        mnist_trainset = datasets.MNIST(
+            root="./data", train=True, download=True, transform=self.transform_train
+        )
+        mnist_testset = datasets.MNIST(
+            root="./data", train=False, download=True, transform=self.transform_test
+        )
+
+        return mnist_trainset, mnist_testset
+
+    def create_validationset(self, mnist_trainset):
+        num_train = len(mnist_trainset)
+        indices = list(range(num_train))
+        split = int(self.validation_size * num_train)
+
+        if self.shuffle:
+            np.random.seed(self.random_seed)
+            np.random.shuffle(indices)
+
+        train_idx, valid_idx = indices[split:], indices[:split]
+
+        train_sampler = SubsetRandomSampler(train_idx)
+        validation_sampler = SubsetRandomSampler(valid_idx)
+
+        loader_train = DataLoader(
+            dataset=mnist_trainset,
+            batch_size=self.batch_size,
+            sampler=train_sampler,
+            num_workers=self.num_workers,
+        )
+        loader_validation = DataLoader(
+            dataset=mnist_trainset,
+            batch_size=self.batch_size,
+            sampler=validation_sampler,
+            num_workers=self.num_workers,
+        )
+
+        return loader_train, loader_validation
+
+    def main(self):
+        mnist_trainset, mnist_testset = self.download_data()
+
+        if self.create_validation_set:
+            loader_train, loader_validation = self.create_validationset(mnist_trainset)
+            loader_test = DataLoader(
+                dataset=mnist_testset,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+            return loader_train, loader_validation, loader_test
+
+        else:
+            loader_train = DataLoader(
+                dataset=mnist_trainset,
+                batch_size=self.batch_size,
+                shuffle=self.shuffle,
+                num_workers=self.num_workers,
+            )
+            loader_test = DataLoader(
+                dataset=mnist_testset,
+                batch_size=self.batch_size,
+                shuffle=False,
+                num_workers=self.num_workers,
+            )
+
+            return loader_train, loader_test
--- a/ML/Projects/Exploring_MNIST/utils/utils.py
+++ b/ML/Projects/Exploring_MNIST/utils/utils.py
@@ -0,0 +1,130 @@
+import torch
+import visdom
+import os
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float32
+
+
+def save_checkpoint(filename, model, optimizer, train_acc, epoch):
+    save_state = {
+        "state_dict": model.state_dict(),
+        "acc": train_acc,
+        "epoch": epoch + 1,
+        "optimizer": optimizer.state_dict(),
+    }
+    print()
+    print("Saving current parameters")
+    print("___________________________________________________________")
+
+    torch.save(save_state, filename)
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training or validation set")
+    else:
+        print("Checking accuracy on test set")
+    num_correct = 0
+    num_samples = 0
+    # model.eval()  # set model to evaluation mode
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
+            y = y.to(device=device, dtype=torch.long)
+            scores = model(x)
+            _, preds = scores.max(1)
+            num_correct += (preds == y).sum()
+            num_samples += preds.size(0)
+        acc = (float(num_correct) / num_samples) * 100.0
+        print("Got %d / %d correct (%.2f)" % (num_correct, num_samples, acc))
+        return acc
+
+
+def load_model(args, model, optimizer):
+    if args.resume:
+        model.eval()
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume)
+            start_epoch = checkpoint["epoch"]
+            best_acc = checkpoint["acc"]
+            model.load_state_dict(checkpoint["state_dict"])
+            optimizer.load_state_dict(checkpoint["optimizer"])
+            print(
+                "=> loaded checkpoint '{}' (epoch {})".format(
+                    args.resume, checkpoint["epoch"]
+                )
+            )
+            return model, optimizer, checkpoint, start_epoch, best_acc
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    else:
+        print("No pretrained model. Starting from scratch!")
+
+
+class visdom_plotting(object):
+    def __init__(self):
+        self.viz = visdom.Visdom()
+
+        self.cur_batch_win = None
+        self.cur_batch_win_opts = {
+            "title": "Epoch Loss Trace",
+            "xlabel": "Batch Number",
+            "ylabel": "Loss",
+            "width": 600,
+            "height": 400,
+        }
+
+        self.cur_validation_acc = None
+        self.cur_validation_acc_opts = {
+            "title": "Validation accuracy",
+            "xlabel": "Epochs",
+            "ylabel": "Validation Accuracy",
+            "width": 600,
+            "height": 400,
+        }
+
+        self.cur_training_acc = None
+        self.cur_training_acc_opts = {
+            "title": "Training accuracy",
+            "xlabel": "Epochs",
+            "ylabel": "Train Accuracy",
+            "width": 600,
+            "height": 400,
+        }
+
+    def create_plot(
+        self, loss_list, batch_list, validation_acc_list, epoch_list, training_acc_list
+    ):
+
+        if self.viz.check_connection():
+            self.cur_batch_win = self.viz.line(
+                torch.FloatTensor(loss_list),
+                torch.FloatTensor(batch_list),
+                win=self.cur_batch_win,
+                name="current_batch_loss",
+                update=(None if self.cur_batch_win is None else "replace"),
+                opts=self.cur_batch_win_opts,
+            )
+
+            self.cur_validation_acc = self.viz.line(
+                torch.FloatTensor(validation_acc_list),
+                torch.FloatTensor(epoch_list),
+                win=self.cur_validation_acc,
+                name="current_validation_accuracy",
+                update=(None if self.cur_validation_acc is None else "replace"),
+                opts=self.cur_validation_acc_opts,
+            )
+
+            self.cur_training_acc = self.viz.line(
+                torch.FloatTensor(training_acc_list),
+                torch.FloatTensor(epoch_list),
+                win=self.cur_validation_acc,
+                name="current_training_accuracy",
+                update=(None if self.cur_training_acc is None else "replace"),
+                opts=self.cur_training_acc_opts,
+            )
+
+
+#
--- a/ML/Projects/spam_classifier_naive_bayes/build_vocabulary.py
+++ b/ML/Projects/spam_classifier_naive_bayes/build_vocabulary.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""
+We want go through each word in all emails,
+check if the word is an actual english word
+by comparing with nltk.corpus words and if it is
+then add it to our vocabulary.
+
+"""
+
+import pandas as pd
+import nltk
+from nltk.corpus import words
+
+vocabulary = {}
+data = pd.read_csv("data/emails.csv")
+nltk.download("words")
+set_words = set(words.words())
+
+
+def build_vocabulary(curr_email):
+    idx = len(vocabulary)
+    for word in curr_email:
+        if word.lower() not in vocabulary and word.lower() in set_words:
+            vocabulary[word] = idx
+            idx += 1
+
+
+if __name__ == "__main__":
+    for i in range(data.shape[0]):
+        curr_email = data.iloc[i, :][0].split()
+        print(
+            f"Current email is {i}/{data.shape[0]} and the \
+               length of vocab is curr {len(vocabulary)}"
+        )
+
+        build_vocabulary(curr_email)
+
+# Write dictionary to vocabulary.txt file
+file = open("vocabulary.txt", "w")
+file.write(str(vocabulary))
+file.close()
--- a/ML/Projects/spam_classifier_naive_bayes/create_freq_vectors.py
+++ b/ML/Projects/spam_classifier_naive_bayes/create_freq_vectors.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+"""
+Having created our vocabulary we now need to create
+the dataset X,y which we will create by doing frequency
+vector for each email. For example if our vocabulary
+has the words
+
+[aardkvark, ..., buy, ... money, .... zulu]
+
+We go through each email and count up how many times each
+word was repeated, so for a specific example this might look
+like:
+    
+[0, ..., 4, ... 2, .... 0] 
+
+And perhaps since both "buy" and "money" this email might be
+spam
+
+"""
+import pandas as pd
+import numpy as np
+import ast
+
+data = pd.read_csv("data/emails.csv")
+file = open("vocabulary.txt", "r")
+contents = file.read()
+vocabulary = ast.literal_eval(contents)
+
+X = np.zeros((data.shape[0], len(vocabulary)))
+y = np.zeros((data.shape[0]))
+
+for i in range(data.shape[0]):
+    email = data.iloc[i, :][0].split()
+
+    for email_word in email:
+        if email_word.lower() in vocabulary:
+            X[i, vocabulary[email_word]] += 1
+
+    y[i] = data.iloc[i, :][1]
+
+# Save stored numpy arrays
+np.save("data/X.npy", X)
+np.save("data/y.npy", y)
--- a/ML/Projects/spam_classifier_naive_bayes/data/emails.csv
+++ b/ML/Projects/spam_classifier_naive_bayes/data/emails.csv
--- a/ML/Projects/spam_classifier_naive_bayes/naivebayes.py
+++ b/ML/Projects/spam_classifier_naive_bayes/naivebayes.py
@@ -0,0 +1,68 @@
+"""
+Naive Bayes Classifier Implementation from scratch
+
+To run the code structure the code in the following way:
+    X be size: (num_training_examples, num_features)
+    y be size: (num_classes, )
+
+Where the classes are 0, 1, 2, etc. Then an example run looks like:
+    NB = NaiveBayes(X, y)
+    NB.fit(X)
+    predictions = NB.predict(X)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-21 Initial coding
+
+"""
+import numpy as np
+
+
+class NaiveBayes:
+    def __init__(self, X, y):
+        self.num_examples, self.num_features = X.shape
+        self.num_classes = len(np.unique(y))
+        self.eps = 1e-6
+
+    def fit(self, X):
+        self.classes_mean = {}
+        self.classes_variance = {}
+        self.classes_prior = {}
+
+        for c in range(self.num_classes):
+            X_c = X[y == c]
+
+            self.classes_mean[str(c)] = np.mean(X_c, axis=0)
+            self.classes_variance[str(c)] = np.var(X_c, axis=0)
+            self.classes_prior[str(c)] = X_c.shape[0] / X.shape[0]
+
+    def predict(self, X):
+        probs = np.zeros((self.num_examples, self.num_classes))
+
+        for c in range(self.num_classes):
+            prior = self.classes_prior[str(c)]
+            probs_c = self.density_function(
+                X, self.classes_mean[str(c)], self.classes_variance[str(c)]
+            )
+            probs[:, c] = probs_c + np.log(prior)
+
+        return np.argmax(probs, 1)
+
+    def density_function(self, x, mean, sigma):
+        # Calculate probability from Gaussian density function
+        const = -self.num_features / 2 * np.log(2 * np.pi) - 0.5 * np.sum(
+            np.log(sigma + self.eps)
+        )
+        probs = 0.5 * np.sum(np.power(x - mean, 2) / (sigma + self.eps), 1)
+        return const - probs
+
+
+if __name__ == "__main__":
+    # For spam emails (Make sure to run build_vocab etc. to have .npy files)
+    X = np.load("data/X.npy")
+    y = np.load("data/y.npy")
+
+    NB = NaiveBayes(X, y)
+    NB.fit(X)
+    y_pred = NB.predict(X)
+
+    print(f"Accuracy: {sum(y_pred==y)/X.shape[0]}")
--- a/ML/Projects/text_generation_babynames/data/example_names.txt
+++ b/ML/Projects/text_generation_babynames/data/example_names.txt
@@ -0,0 +1,19 @@
+Niela
+Elia
+Leneth
+Ley
+Ira
+Bernandel
+Gelico
+Marti
+Ednie
+Ozel
+Marin
+Elithon
+Mirce
+Elie
+Elvar
+Domarine
+Artha
+Audrey
+Davyd
--- a/ML/Projects/text_generation_babynames/data/names.txt
+++ b/ML/Projects/text_generation_babynames/data/names.txt
--- a/ML/Projects/text_generation_babynames/data/shakespeare_larger.txt
+++ b/ML/Projects/text_generation_babynames/data/shakespeare_larger.txt
--- a/ML/Projects/text_generation_babynames/data/shakespeare_tiny.txt
+++ b/ML/Projects/text_generation_babynames/data/shakespeare_tiny.txt
--- a/ML/Projects/text_generation_babynames/generating_names.py
+++ b/ML/Projects/text_generation_babynames/generating_names.py
@@ -0,0 +1,144 @@
+"""
+Text generation using a character LSTM, specifically we want to
+generate new names as inspiration for those having a baby :) 
+
+Although this is for name generation, the code is general in the
+way that you can just send in any large text file (shakespear text, etc)
+and it will generate it.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-05-09 Initial coding
+
+"""
+
+import torch
+import torch.nn as nn
+import string
+import random
+import sys
+import unidecode
+
+# Device configuration
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Get characters from string.printable
+all_characters = string.printable
+n_characters = len(all_characters)
+
+# Read large text file (Note can be any text file: not limited to just names)
+file = unidecode.unidecode(open("data/names.txt").read())
+
+
+class RNN(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, output_size):
+        super(RNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+
+        self.embed = nn.Embedding(input_size, hidden_size)
+        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size, output_size)
+
+    def forward(self, x, hidden, cell):
+        out = self.embed(x)
+        out, (hidden, cell) = self.lstm(out.unsqueeze(1), (hidden, cell))
+        out = self.fc(out.reshape(out.shape[0], -1))
+        return out, (hidden, cell)
+
+    def init_hidden(self, batch_size):
+        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
+        cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
+        return hidden, cell
+
+
+class Generator:
+    def __init__(self):
+        self.chunk_len = 250
+        self.num_epochs = 5000
+        self.batch_size = 1
+        self.print_every = 50
+        self.hidden_size = 256
+        self.num_layers = 2
+        self.lr = 0.003
+
+    def char_tensor(self, string):
+        tensor = torch.zeros(len(string)).long()
+        for c in range(len(string)):
+            tensor[c] = all_characters.index(string[c])
+        return tensor
+
+    def get_random_batch(self):
+        start_idx = random.randint(0, len(file) - self.chunk_len)
+        end_idx = start_idx + self.chunk_len + 1
+        text_str = file[start_idx:end_idx]
+        text_input = torch.zeros(self.batch_size, self.chunk_len)
+        text_target = torch.zeros(self.batch_size, self.chunk_len)
+
+        for i in range(self.batch_size):
+            text_input[i, :] = self.char_tensor(text_str[:-1])
+            text_target[i, :] = self.char_tensor(text_str[1:])
+
+        return text_input.long(), text_target.long()
+
+    def generate(self, initial_str="A", predict_len=100, temperature=0.85):
+        hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
+        initial_input = self.char_tensor(initial_str)
+        predicted = initial_str
+
+        for p in range(len(initial_str) - 1):
+            _, (hidden, cell) = self.rnn(
+                initial_input[p].view(1).to(device), hidden, cell
+            )
+
+        last_char = initial_input[-1]
+
+        for p in range(predict_len):
+            output, (hidden, cell) = self.rnn(
+                last_char.view(1).to(device), hidden, cell
+            )
+            output_dist = output.data.view(-1).div(temperature).exp()
+            top_char = torch.multinomial(output_dist, 1)[0]
+            predicted_char = all_characters[top_char]
+            predicted += predicted_char
+            last_char = self.char_tensor(predicted_char)
+
+        return predicted
+
+    # input_size, hidden_size, num_layers, output_size
+    def train(self):
+        self.rnn = RNN(
+            n_characters, self.hidden_size, self.num_layers, n_characters
+        ).to(device)
+
+        optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
+        criterion = nn.CrossEntropyLoss()
+        writer = SummaryWriter(f"runs/names0")  # for tensorboard
+
+        print("=> Starting training")
+
+        for epoch in range(1, self.num_epochs + 1):
+            inp, target = self.get_random_batch()
+            hidden, cell = self.rnn.init_hidden(batch_size=self.batch_size)
+
+            self.rnn.zero_grad()
+            loss = 0
+            inp = inp.to(device)
+            target = target.to(device)
+
+            for c in range(self.chunk_len):
+                output, (hidden, cell) = self.rnn(inp[:, c], hidden, cell)
+                loss += criterion(output, target[:, c])
+
+            loss.backward()
+            optimizer.step()
+            loss = loss.item() / self.chunk_len
+
+            if epoch % self.print_every == 0:
+                print(f"Loss: {loss}")
+                print(self.generate())
+
+            writer.add_scalar("Training loss", loss, global_step=epoch)
+
+
+gennames = Generator()
+gennames.train()
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.0.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.0.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.1.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.1.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.2.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.2.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.3.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.3.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.4.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.4.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.5.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.5.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.6.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.6.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.7.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/cat.7.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/dog.0.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/cats/dog.0.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123686.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123686.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123687.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123687.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123688.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123688.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123689.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123689.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123690.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123690.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123691.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123691.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123692.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123692.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123693.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123693.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123694.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123694.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123695.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/cat_dogs/dogs/n123695.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/classification.py
+++ b/ML/Pytorch/Basics/albumentations_tutorial/classification.py
@@ -0,0 +1,31 @@
+import cv2
+import albumentations as A
+import numpy as np
+from utils import plot_examples
+from PIL import Image
+
+image = Image.open("images/elon.jpeg")
+
+transform = A.Compose(
+    [
+        A.Resize(width=1920, height=1080),
+        A.RandomCrop(width=1280, height=720),
+        A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
+        A.HorizontalFlip(p=0.5),
+        A.VerticalFlip(p=0.1),
+        A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
+        A.OneOf([
+            A.Blur(blur_limit=3, p=0.5),
+            A.ColorJitter(p=0.5),
+        ], p=1.0),
+    ]
+)
+
+images_list = [image]
+image = np.array(image)
+for i in range(15):
+    augmentations = transform(image=image)
+    augmented_img = augmentations["image"]
+    images_list.append(augmented_img)
+plot_examples(images_list)
+
--- a/ML/Pytorch/Basics/albumentations_tutorial/detection.py
+++ b/ML/Pytorch/Basics/albumentations_tutorial/detection.py
@@ -0,0 +1,41 @@
+import cv2
+import albumentations as A
+import numpy as np
+from utils import plot_examples
+from PIL import Image
+
+image = cv2.imread("images/cat.jpg")
+image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+bboxes = [[13, 170, 224, 410]]
+
+# Pascal_voc (x_min, y_min, x_max, y_max), YOLO, COCO
+
+transform = A.Compose(
+    [
+        A.Resize(width=1920, height=1080),
+        A.RandomCrop(width=1280, height=720),
+        A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
+        A.HorizontalFlip(p=0.5),
+        A.VerticalFlip(p=0.1),
+        A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
+        A.OneOf([
+            A.Blur(blur_limit=3, p=0.5),
+            A.ColorJitter(p=0.5),
+        ], p=1.0),
+    ], bbox_params=A.BboxParams(format="pascal_voc", min_area=2048,
+                                min_visibility=0.3, label_fields=[])
+)
+
+images_list = [image]
+saved_bboxes = [bboxes[0]]
+for i in range(15):
+    augmentations = transform(image=image, bboxes=bboxes)
+    augmented_img = augmentations["image"]
+
+    if len(augmentations["bboxes"]) == 0:
+        continue
+
+    images_list.append(augmented_img)
+    saved_bboxes.append(augmentations["bboxes"][0])
+
+plot_examples(images_list, saved_bboxes)
--- a/ML/Pytorch/Basics/albumentations_tutorial/full_pytorch_example.py
+++ b/ML/Pytorch/Basics/albumentations_tutorial/full_pytorch_example.py
@@ -0,0 +1,62 @@
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+import torch.nn as nn
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+from torch.utils.data import Dataset
+import os
+
+class ImageFolder(Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ImageFolder, self).__init__()
+        self.data = []
+        self.root_dir = root_dir
+        self.transform = transform
+        self.class_names = os.listdir(root_dir)
+
+        for index, name in enumerate(self.class_names):
+            files = os.listdir(os.path.join(root_dir, name))
+            self.data += list(zip(files, [index]*len(files)))
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, index):
+        img_file, label = self.data[index]
+        root_and_dir = os.path.join(self.root_dir, self.class_names[label])
+        image = np.array(Image.open(os.path.join(root_and_dir, img_file)))
+
+        if self.transform is not None:
+            augmentations = self.transform(image=image)
+            image = augmentations["image"]
+
+        return image, label
+
+
+transform = A.Compose(
+    [
+        A.Resize(width=1920, height=1080),
+        A.RandomCrop(width=1280, height=720),
+        A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
+        A.HorizontalFlip(p=0.5),
+        A.VerticalFlip(p=0.1),
+        A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
+        A.OneOf([
+            A.Blur(blur_limit=3, p=0.5),
+            A.ColorJitter(p=0.5),
+        ], p=1.0),
+        A.Normalize(
+            mean=[0, 0, 0],
+            std=[1, 1, 1],
+            max_pixel_value=255,
+        ),
+        ToTensorV2(),
+    ]
+)
+
+dataset = ImageFolder(root_dir="cat_dogs", transform=transform)
+
+for x,y in dataset:
+    print(x.shape)
--- a/ML/Pytorch/Basics/albumentations_tutorial/images/cat.jpg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/images/cat.jpg
--- a/ML/Pytorch/Basics/albumentations_tutorial/images/elon.jpeg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/images/elon.jpeg
--- a/ML/Pytorch/Basics/albumentations_tutorial/images/mask.jpeg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/images/mask.jpeg
--- a/ML/Pytorch/Basics/albumentations_tutorial/images/second_mask.jpeg
+++ b/ML/Pytorch/Basics/albumentations_tutorial/images/second_mask.jpeg
--- a/ML/Pytorch/Basics/albumentations_tutorial/segmentation.py
+++ b/ML/Pytorch/Basics/albumentations_tutorial/segmentation.py
@@ -0,0 +1,37 @@
+import cv2
+import albumentations as A
+import numpy as np
+from utils import plot_examples
+from PIL import Image
+
+image = Image.open("images/elon.jpeg")
+mask = Image.open("images/mask.jpeg")
+mask2 = Image.open("images/second_mask.jpeg")
+
+transform = A.Compose(
+    [
+        A.Resize(width=1920, height=1080),
+        A.RandomCrop(width=1280, height=720),
+        A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT),
+        A.HorizontalFlip(p=0.5),
+        A.VerticalFlip(p=0.1),
+        A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9),
+        A.OneOf([
+            A.Blur(blur_limit=3, p=0.5),
+            A.ColorJitter(p=0.5),
+        ], p=1.0),
+    ]
+)
+
+images_list = [image]
+image = np.array(image)
+mask = np.array(mask) # np.asarray(mask), np.array(mask)
+mask2 = np.array(mask2)
+for i in range(4):
+    augmentations = transform(image=image, masks=[mask, mask2])
+    augmented_img = augmentations["image"]
+    augmented_masks = augmentations["masks"]
+    images_list.append(augmented_img)
+    images_list.append(augmented_masks[0])
+    images_list.append(augmented_masks[1])
+plot_examples(images_list)
--- a/ML/Pytorch/Basics/albumentations_tutorial/utils.py
+++ b/ML/Pytorch/Basics/albumentations_tutorial/utils.py
@@ -0,0 +1,36 @@
+import random
+import cv2
+from matplotlib import pyplot as plt
+import matplotlib.patches as patches
+import numpy as np
+import albumentations as A
+
+
+def visualize(image):
+    plt.figure(figsize=(10, 10))
+    plt.axis('off')
+    plt.imshow(image)
+    plt.show()
+
+
+def plot_examples(images, bboxes=None):
+    fig = plt.figure(figsize=(15, 15))
+    columns = 4
+    rows = 5
+
+    for i in range(1, len(images)):
+        if bboxes is not None:
+            img = visualize_bbox(images[i - 1], bboxes[i - 1], class_name="Elon")
+        else:
+            img = images[i-1]
+        fig.add_subplot(rows, columns, i)
+        plt.imshow(img)
+    plt.show()
+
+
+# From https://albumentations.ai/docs/examples/example_bboxes/
+def visualize_bbox(img, bbox, class_name, color=(255, 0, 0), thickness=5):
+    """Visualizes a single bounding box on the image"""
+    x_min, y_min, x_max, y_max = map(int, bbox)
+    cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, thickness)
+    return img
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs.csv
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs.csv
@@ -0,0 +1,11 @@
+Animal,Label
+cat.0.jpg,0
+cat.1.jpg,0
+cat.2.jpg,0
+cat.3.jpg,0
+cat.4.jpg,0
+cat.5.jpg,0
+cat.6.jpg,0
+cat.7.jpg,0
+dog.0.jpg,1
+dog.1.jpg,1
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.0.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.0.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.1.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.1.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.2.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.2.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.3.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.3.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.4.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.4.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.5.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.5.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.6.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.6.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.7.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/cat.7.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.0.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.0.jpg
--- a/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.1.jpg
+++ b/ML/Pytorch/Basics/custom_dataset/cats_dogs_resized/dog.1.jpg
--- a/ML/Pytorch/Basics/custom_dataset/custom_FCNN.py
+++ b/ML/Pytorch/Basics/custom_dataset/custom_FCNN.py
@@ -0,0 +1,131 @@
+# Imports
+import os
+from typing import Union
+
+import torch.nn.functional as F  # All functions that don't have any parameters
+import pandas as pd
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torchvision
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+from pandas import io
+
+# from skimage import io
+from torch.utils.data import (
+    Dataset,
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+
+
+# Create Fully Connected Network
+class NN(nn.Module):
+    def __init__(self, input_size, num_classes):
+        super(NN, self).__init__()
+        self.fc1 = nn.Linear(input_size, 50)
+        self.fc2 = nn.Linear(50, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
+class SoloDataset(Dataset):
+    def __init__(self, csv_file, root_dir, transform=None):
+        self.annotations = pd.read_csv(csv_file)
+        self.root_dir = root_dir
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.annotations)
+
+    def __getitem__(self, index):
+        x_data = self.annotations.iloc[index, 0:11]
+        x_data = torch.tensor(x_data)
+        y_label = torch.tensor(int(self.annotations.iloc[index, 11]))
+
+        return (x_data.float(), y_label)
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+num_classes = 26
+learning_rate = 1e-3
+batch_size = 5
+num_epochs = 30
+input_size = 11
+
+# Load Data
+dataset = SoloDataset(
+    csv_file="power.csv", root_dir="test123", transform=transforms.ToTensor()
+)
+train_set, test_set = torch.utils.data.random_split(dataset, [2900, 57])
+train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
+test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
+
+# Model
+model = NN(input_size=input_size, num_classes=num_classes).to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+print(len(train_set))
+print(len(test_set))
+# Train Network
+for epoch in range(num_epochs):
+    losses = []
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        losses.append(loss.item())
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+    print(f"Cost at epoch {epoch} is {sum(losses) / len(losses)}")
+
+
+# Check accuracy on training to see how good our model is
+def check_accuracy(loader, model):
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
+        )
+
+    model.train()
+
+
+print("Checking accuracy on Training Set")
+check_accuracy(train_loader, model)
+
+print("Checking accuracy on Test Set")
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/custom_dataset/custom_dataset.py
+++ b/ML/Pytorch/Basics/custom_dataset/custom_dataset.py
@@ -0,0 +1,130 @@
+"""
+Example of how to create custom dataset in Pytorch. In this case
+we have images of cats and dogs in a separate folder and a csv
+file containing the name to the jpg file as well as the target
+label (0 for cat, 1 for dog).
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-03 Initial coding
+
+"""
+
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+import torchvision
+import os
+import pandas as pd
+from skimage import io
+from torch.utils.data import (
+    Dataset,
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+
+
+class CatsAndDogsDataset(Dataset):
+    def __init__(self, csv_file, root_dir, transform=None):
+        self.annotations = pd.read_csv(csv_file)
+        self.root_dir = root_dir
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.annotations)
+
+    def __getitem__(self, index):
+        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
+        image = io.imread(img_path)
+        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
+
+        if self.transform:
+            image = self.transform(image)
+
+        return (image, y_label)
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+in_channel = 3
+num_classes = 2
+learning_rate = 1e-3
+batch_size = 32
+num_epochs = 10
+
+# Load Data
+dataset = CatsAndDogsDataset(
+    csv_file="cats_dogs.csv",
+    root_dir="cats_dogs_resized",
+    transform=transforms.ToTensor(),
+)
+
+# Dataset is actually a lot larger ~25k images, just took out 10 pictures
+# to upload to Github. It's enough to understand the structure and scale
+# if you got more images.
+train_set, test_set = torch.utils.data.random_split(dataset, [5, 5])
+train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
+test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)
+
+# Model
+model = torchvision.models.googlenet(pretrained=True)
+model.to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    losses = []
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        losses.append(loss.item())
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")
+
+# Check accuracy on training to see how good our model is
+def check_accuracy(loader, model):
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+print("Checking accuracy on Training Set")
+check_accuracy(train_loader, model)
+
+print("Checking accuracy on Test Set")
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/custom_dataset/power.csv
+++ b/ML/Pytorch/Basics/custom_dataset/power.csv
--- a/ML/Pytorch/Basics/custom_dataset_txt/loader_customtext.py
+++ b/ML/Pytorch/Basics/custom_dataset_txt/loader_customtext.py
@@ -0,0 +1,142 @@
+import os  # when loading file paths
+import pandas as pd  # for lookup in annotation file
+import spacy  # for tokenizer
+import torch
+from torch.nn.utils.rnn import pad_sequence  # pad batch
+from torch.utils.data import DataLoader, Dataset
+from PIL import Image  # Load img
+import torchvision.transforms as transforms
+
+
+# We want to convert text -> numerical values
+# 1. We need a Vocabulary mapping each word to a index
+# 2. We need to setup a Pytorch dataset to load the data
+# 3. Setup padding of every batch (all examples should be
+#    of same seq_len and setup dataloader)
+# Note that loading the image is very easy compared to the text!
+
+# Download with: python -m spacy download en
+spacy_eng = spacy.load("en")
+
+
+class Vocabulary:
+    def __init__(self, freq_threshold):
+        self.itos = {0: "<PAD>", 1: "<SOS>", 2: "<EOS>", 3: "<UNK>"}
+        self.stoi = {"<PAD>": 0, "<SOS>": 1, "<EOS>": 2, "<UNK>": 3}
+        self.freq_threshold = freq_threshold
+
+    def __len__(self):
+        return len(self.itos)
+
+    @staticmethod
+    def tokenizer_eng(text):
+        return [tok.text.lower() for tok in spacy_eng.tokenizer(text)]
+
+    def build_vocabulary(self, sentence_list):
+        frequencies = {}
+        idx = 4
+
+        for sentence in sentence_list:
+            for word in self.tokenizer_eng(sentence):
+                if word not in frequencies:
+                    frequencies[word] = 1
+
+                else:
+                    frequencies[word] += 1
+
+                if frequencies[word] == self.freq_threshold:
+                    self.stoi[word] = idx
+                    self.itos[idx] = word
+                    idx += 1
+
+    def numericalize(self, text):
+        tokenized_text = self.tokenizer_eng(text)
+
+        return [
+            self.stoi[token] if token in self.stoi else self.stoi["<UNK>"]
+            for token in tokenized_text
+        ]
+
+
+class FlickrDataset(Dataset):
+    def __init__(self, root_dir, captions_file, transform=None, freq_threshold=5):
+        self.root_dir = root_dir
+        self.df = pd.read_csv(captions_file)
+        self.transform = transform
+
+        # Get img, caption columns
+        self.imgs = self.df["image"]
+        self.captions = self.df["caption"]
+
+        # Initialize vocabulary and build vocab
+        self.vocab = Vocabulary(freq_threshold)
+        self.vocab.build_vocabulary(self.captions.tolist())
+
+    def __len__(self):
+        return len(self.df)
+
+    def __getitem__(self, index):
+        caption = self.captions[index]
+        img_id = self.imgs[index]
+        img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
+
+        if self.transform is not None:
+            img = self.transform(img)
+
+        numericalized_caption = [self.vocab.stoi["<SOS>"]]
+        numericalized_caption += self.vocab.numericalize(caption)
+        numericalized_caption.append(self.vocab.stoi["<EOS>"])
+
+        return img, torch.tensor(numericalized_caption)
+
+
+class MyCollate:
+    def __init__(self, pad_idx):
+        self.pad_idx = pad_idx
+
+    def __call__(self, batch):
+        imgs = [item[0].unsqueeze(0) for item in batch]
+        imgs = torch.cat(imgs, dim=0)
+        targets = [item[1] for item in batch]
+        targets = pad_sequence(targets, batch_first=False, padding_value=self.pad_idx)
+
+        return imgs, targets
+
+
+def get_loader(
+    root_folder,
+    annotation_file,
+    transform,
+    batch_size=32,
+    num_workers=8,
+    shuffle=True,
+    pin_memory=True,
+):
+    dataset = FlickrDataset(root_folder, annotation_file, transform=transform)
+
+    pad_idx = dataset.vocab.stoi["<PAD>"]
+
+    loader = DataLoader(
+        dataset=dataset,
+        batch_size=batch_size,
+        num_workers=num_workers,
+        shuffle=shuffle,
+        pin_memory=pin_memory,
+        collate_fn=MyCollate(pad_idx=pad_idx),
+    )
+
+    return loader, dataset
+
+
+if __name__ == "__main__":
+    transform = transforms.Compose(
+        [transforms.Resize((224, 224)), transforms.ToTensor(),]
+    )
+
+    loader, dataset = get_loader(
+        "flickr8k/images/", "flickr8k/captions.txt", transform=transform
+    )
+
+    for idx, (imgs, captions) in enumerate(loader):
+        print(imgs.shape)
+        print(captions.shape)
--- a/ML/Pytorch/Basics/pytorch_bidirectional_lstm.py
+++ b/ML/Pytorch/Basics/pytorch_bidirectional_lstm.py
@@ -0,0 +1,125 @@
+"""
+Example code of a simple bidirectional LSTM on the MNIST dataset.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-05-09 Initial coding
+
+"""
+
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+input_size = 28
+sequence_length = 28
+num_layers = 2
+hidden_size = 256
+num_classes = 10
+learning_rate = 0.001
+batch_size = 64
+num_epochs = 2
+
+# Create a bidirectional LSTM
+class BRNN(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super(BRNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(
+            input_size, hidden_size, num_layers, batch_first=True, bidirectional=True
+        )
+        self.fc = nn.Linear(hidden_size * 2, num_classes)
+
+    def forward(self, x):
+        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
+
+        out, _ = self.lstm(x, (h0, c0))
+        out = self.fc(out[:, -1, :])
+
+        return out
+
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+
+test_dataset = datasets.MNIST(
+    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
+)
+
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
+
+# Initialize network
+model = BRNN(input_size, hidden_size, num_layers, num_classes).to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device).squeeze(1)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+# Check accuracy on training & test to see how good our model
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device).squeeze(1)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy  \
+              {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/pytorch_init_weights.py
+++ b/ML/Pytorch/Basics/pytorch_init_weights.py
@@ -0,0 +1,69 @@
+"""
+Example code of how to initialize weights for a simple CNN network.
+
+Video explanation: https://youtu.be/xWQ-p_o0Uik
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-10 Initial coding
+
+"""
+
+# Imports
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.nn.functional as F  # All functions that don't have any parameters
+
+
+class CNN(nn.Module):
+    def __init__(self, in_channels, num_classes):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=6,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(
+            in_channels=6,
+            out_channels=16,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
+        self.initialize_weights()
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(x)
+        x = F.relu(self.conv2(x))
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.fc1(x)
+
+        return x
+
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_uniform_(m.weight)
+
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+            elif isinstance(m, nn.Linear):
+                nn.init.kaiming_uniform_(m.weight)
+                nn.init.constant_(m.bias, 0)
+
+
+if __name__ == "__main__":
+    model = CNN(in_channels=3, num_classes=10)
+
+    for param in model.parameters():
+        print(param)
--- a/ML/Pytorch/Basics/pytorch_loadsave.py
+++ b/ML/Pytorch/Basics/pytorch_loadsave.py
@@ -0,0 +1,54 @@
+"""
+Small code example of how to save and load checkpoint of a model.
+This example doesn't perform any training, so it would be quite useless.
+
+In practice you would save the model as you train, and then load before 
+continuining training at another point.
+
+Video explanation of code & how to save and load model: https://youtu.be/g6kQl_EFn84
+Got any questions leave a comment on youtube :)
+
+Coded by Aladdin Persson <aladdin dot person at hotmail dot com>
+-   2020-04-07 Initial programming
+
+"""
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+
+def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
+    print("=> Saving checkpoint")
+    torch.save(state, filename)
+
+
+def load_checkpoint(checkpoint, model, optimizer):
+    print("=> Loading checkpoint")
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+
+
+def main():
+    # Initialize network
+    model = torchvision.models.vgg16(pretrained=False)
+    optimizer = optim.Adam(model.parameters())
+
+    checkpoint = {"state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
+    # Try save checkpoint
+    save_checkpoint(checkpoint)
+
+    # Try load checkpoint
+    load_checkpoint(torch.load("my_checkpoint.pth.tar"), model, optimizer)
+
+
+if __name__ == "__main__":
+    main()
--- a/ML/Pytorch/Basics/pytorch_lr_ratescheduler.py
+++ b/ML/Pytorch/Basics/pytorch_lr_ratescheduler.py
@@ -0,0 +1,107 @@
+"""
+Example code of how to use a learning rate scheduler simple, in this
+case with a (very) small and simple Feedforward Network training on MNIST
+dataset with a learning rate scheduler. In this case ReduceLROnPlateau
+scheduler is used, but can easily be changed to any of the other schedulers
+available.
+
+Video explanation: https://youtu.be/P31hB37g4Ak
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-10 Initial programming
+
+"""
+
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+num_classes = 10
+learning_rate = 0.1
+batch_size = 128
+num_epochs = 100
+
+# Define a very simple model
+model = nn.Sequential(nn.Linear(784, 50), nn.ReLU(), nn.Linear(50, 10)).to(device)
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Define Scheduler
+scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+    optimizer, factor=0.1, patience=5, verbose=True
+)
+
+# Train Network
+for epoch in range(1, num_epochs):
+    losses = []
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.reshape(data.shape[0], -1)
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        losses.append(loss.item())
+
+        # backward
+        loss.backward()
+
+        # gradient descent or adam step
+        # scheduler.step(loss)
+        optimizer.step()
+        optimizer.zero_grad()
+
+    mean_loss = sum(losses) / len(losses)
+
+    # After each epoch do scheduler.step, note in this scheduler we need to send
+    # in loss for that epoch!
+    scheduler.step(mean_loss)
+    print(f"Cost at epoch {epoch} is {mean_loss}")
+
+# Check accuracy on training & test to see how good our model
+def check_accuracy(loader, model):
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
--- a/ML/Pytorch/Basics/pytorch_mixed_precision_example.py
+++ b/ML/Pytorch/Basics/pytorch_mixed_precision_example.py
@@ -0,0 +1,99 @@
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import DataLoader  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+
+# Simple CNN
+class CNN(nn.Module):
+    def __init__(self, in_channels=1, num_classes=10):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels=1, out_channels=420, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(in_channels=420, out_channels=1000, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.fc1 = nn.Linear(1000 * 7 * 7, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(x)
+        x = F.relu(self.conv2(x))
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.fc1(x)
+
+        return x
+
+
+# Set device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+# Hyperparameters
+in_channel = 1
+num_classes = 10
+learning_rate = 0.001
+batch_size = 100
+num_epochs = 5
+
+# Load Data
+train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
+test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
+
+# Initialize network
+model = CNN().to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Necessary for FP16
+scaler = torch.cuda.amp.GradScaler()
+
+# Train Network
+for epoch in range(num_epochs):
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        with torch.cuda.amp.autocast():
+            scores = model(data)
+            loss = criterion(scores, targets)
+
+        # backward
+        optimizer.zero_grad()
+        scaler.scale(loss).backward()
+        scaler.step(optimizer)
+        scaler.update()
+
+
+# Check accuracy on training & test to see how good our model
+
+def check_accuracy(loader, model):
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}')
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/pytorch_pretrain_finetune.py
+++ b/ML/Pytorch/Basics/pytorch_pretrain_finetune.py
@@ -0,0 +1,123 @@
+"""
+Shows a small example of how to load a pretrain model (VGG16) from PyTorch,
+and modifies this to train on the CIFAR10 dataset. The same method generalizes
+well to other datasets, but the modifications to the network may need to be changed.
+
+Video explanation: https://youtu.be/U4bHxEhMGNk
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-08 Initial coding
+
+"""
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+num_classes = 10
+learning_rate = 1e-3
+batch_size = 1024
+num_epochs = 5
+
+# Simple Identity class that let's input pass without changes
+class Identity(nn.Module):
+    def __init__(self):
+        super(Identity, self).__init__()
+
+    def forward(self, x):
+        return x
+
+
+# Load pretrain model & modify it
+model = torchvision.models.vgg16(pretrained=True)
+
+# If you want to do finetuning then set requires_grad = False
+# Remove these two lines if you want to train entire model,
+# and only want to load the pretrain weights.
+for param in model.parameters():
+    param.requires_grad = False
+
+model.avgpool = Identity()
+model.classifier = nn.Sequential(
+    nn.Linear(512, 100), nn.ReLU(), nn.Linear(100, num_classes)
+)
+model.to(device)
+
+
+# Load Data
+train_dataset = datasets.CIFAR10(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    losses = []
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        losses.append(loss.item())
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses):.5f}")
+
+# Check accuracy on training & test to see how good our model
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
--- a/ML/Pytorch/Basics/pytorch_progress_bar.py
+++ b/ML/Pytorch/Basics/pytorch_progress_bar.py
@@ -0,0 +1,41 @@
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+from torch.utils.data import TensorDataset, DataLoader
+
+# Create a simple toy dataset example, normally this
+# would be doing custom class with __getitem__ etc,
+# which we have done in custom dataset tutorials
+x = torch.randn((1000, 3, 224, 224))
+y = torch.randint(low=0, high=10, size=(1000, 1))
+ds = TensorDataset(x, y)
+loader = DataLoader(ds, batch_size=8)
+
+
+model = nn.Sequential(
+    nn.Conv2d(3, 10, kernel_size=3, padding=1, stride=1),
+    nn.Flatten(),
+    nn.Linear(10*224*224, 10),
+)
+
+NUM_EPOCHS = 100
+for epoch in range(NUM_EPOCHS):
+    loop = tqdm(loader)
+    for idx, (x, y) in enumerate(loop):
+        scores = model(x)
+
+        # here we would compute loss, backward, optimizer step etc.
+        # you know how it goes, but now you have a nice progress bar
+        # with tqdm
+
+        # then at the bottom if you want additional info shown, you can
+        # add it here, for loss and accuracy you would obviously compute
+        # but now we just set them to random values
+        loop.set_description(f"Epoch [{epoch}/{NUM_EPOCHS}]")
+        loop.set_postfix(loss=torch.rand(1).item(), acc=torch.rand(1).item())
+
+# There you go. Hope it was useful :)
+
+
+
+
--- a/ML/Pytorch/Basics/pytorch_rnn_gru_lstm.py
+++ b/ML/Pytorch/Basics/pytorch_rnn_gru_lstm.py
@@ -0,0 +1,172 @@
+"""
+Example code of a simple RNN, GRU, LSTM on the MNIST dataset.
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-05-09 Initial coding
+
+"""
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+input_size = 28
+hidden_size = 256
+num_layers = 2
+num_classes = 10
+sequence_length = 28
+learning_rate = 0.005
+batch_size = 64
+num_epochs = 2
+
+# Recurrent neural network (many-to-one)
+class RNN(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super(RNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
+
+    def forward(self, x):
+        # Set initial hidden and cell states
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
+
+        # Forward propagate LSTM
+        out, _ = self.rnn(x, h0)
+        out = out.reshape(out.shape[0], -1)
+
+        # Decode the hidden state of the last time step
+        out = self.fc(out)
+        return out
+
+
+# Recurrent neural network with GRU (many-to-one)
+class RNN_GRU(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super(RNN_GRU, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
+
+    def forward(self, x):
+        # Set initial hidden and cell states
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
+
+        # Forward propagate LSTM
+        out, _ = self.gru(x, h0)
+        out = out.reshape(out.shape[0], -1)
+
+        # Decode the hidden state of the last time step
+        out = self.fc(out)
+        return out
+
+
+# Recurrent neural network with LSTM (many-to-one)
+class RNN_LSTM(nn.Module):
+    def __init__(self, input_size, hidden_size, num_layers, num_classes):
+        super(RNN_LSTM, self).__init__()
+        self.hidden_size = hidden_size
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
+        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
+
+    def forward(self, x):
+        # Set initial hidden and cell states
+        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
+        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
+
+        # Forward propagate LSTM
+        out, _ = self.lstm(
+            x, (h0, c0)
+        )  # out: tensor of shape (batch_size, seq_length, hidden_size)
+        out = out.reshape(out.shape[0], -1)
+
+        # Decode the hidden state of the last time step
+        out = self.fc(out)
+        return out
+
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+
+test_dataset = datasets.MNIST(
+    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
+)
+
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
+
+# Initialize network
+model = RNN_LSTM(input_size, hidden_size, num_layers, num_classes).to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device).squeeze(1)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+# Check accuracy on training & test to see how good our model
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+
+    # Set model to eval
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device).squeeze(1)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with \
+              accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+    # Set model back to train
+    model.train()
+
+
+check_accuracy(train_loader, model)
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/pytorch_simple_CNN.py
+++ b/ML/Pytorch/Basics/pytorch_simple_CNN.py
@@ -0,0 +1,134 @@
+"""
+Example code of a simple CNN network training on MNIST dataset.
+The code is intended to show how to create a CNN network as well
+as how to initialize loss, optimizer, etc. in a simple way to get
+training to work with function that checks accuracy as well.
+
+Video explanation: https://youtu.be/wnK3uWv_WkU
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-08 Initial coding
+
+"""
+
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Simple CNN
+class CNN(nn.Module):
+    def __init__(self, in_channels=1, num_classes=10):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=1,
+            out_channels=8,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(
+            in_channels=8,
+            out_channels=16,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(x)
+        x = F.relu(self.conv2(x))
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.fc1(x)
+
+        return x
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+in_channel = 1
+num_classes = 10
+learning_rate = 0.001
+batch_size = 64
+num_epochs = 5
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+test_dataset = datasets.MNIST(
+    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
+)
+test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
+
+# Initialize network
+model = CNN().to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+# Check accuracy on training & test to see how good our model
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/pytorch_simple_fullynet.py
+++ b/ML/Pytorch/Basics/pytorch_simple_fullynet.py
@@ -0,0 +1,120 @@
+"""
+Working code of a simple Fully Connected (FC) network training on MNIST dataset.
+The code is intended to show how to create a FC network as well
+as how to initialize loss, optimizer, etc. in a simple way to get
+training to work with function that checks accuracy as well.
+
+Video explanation: https://youtu.be/Jy4wM2X21u0
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-08 Initial coding
+
+"""
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Create Fully Connected Network
+class NN(nn.Module):
+    def __init__(self, input_size, num_classes):
+        super(NN, self).__init__()
+        self.fc1 = nn.Linear(input_size, 50)
+        self.fc2 = nn.Linear(50, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return x
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+input_size = 784
+num_classes = 10
+learning_rate = 0.001
+batch_size = 64
+num_epochs = 1
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+test_dataset = datasets.MNIST(
+    root="dataset/", train=False, transform=transforms.ToTensor(), download=True
+)
+test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)
+
+# Initialize network
+model = NN(input_size=input_size, num_classes=num_classes).to(device)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # Get to correct shape
+        data = data.reshape(data.shape[0], -1)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+# Check accuracy on training & test to see how good our model
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+            x = x.reshape(x.shape[0], -1)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
+check_accuracy(test_loader, model)
--- a/ML/Pytorch/Basics/pytorch_std_mean.py
+++ b/ML/Pytorch/Basics/pytorch_std_mean.py
@@ -0,0 +1,28 @@
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+import torchvision.datasets as datasets
+from tqdm import tqdm
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+train_set = datasets.CIFAR10(root="ds/", transform=transforms.ToTensor(), download=True)
+train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
+
+def get_mean_std(loader):
+    # var[X] = E[X**2] - E[X]**2
+    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
+
+    for data, _ in tqdm(loader):
+        channels_sum += torch.mean(data, dim=[0, 2, 3])
+        channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3])
+        num_batches += 1
+
+    mean = channels_sum / num_batches
+    std = (channels_sqrd_sum / num_batches - mean ** 2) ** 0.5
+
+    return mean, std
+
+
+mean, std = get_mean_std(train_loader)
+print(mean)
+print(std)
--- a/ML/Pytorch/Basics/pytorch_tensorbasics.py
+++ b/ML/Pytorch/Basics/pytorch_tensorbasics.py
@@ -0,0 +1,299 @@
+"""
+Walk through of a lot of different useful Tensor Operations, where we
+go through what I think are four main parts in:
+
+1. Initialization of a Tensor
+2. Tensor Mathematical Operations and Comparison
+3. Tensor Indexing
+4. Tensor Reshaping
+
+But also other things such as setting the device (GPU/CPU) and converting
+between different types (int, float etc) and how to convert a tensor to an
+numpy array and vice-versa.
+
+"""
+
+import torch
+
+# ================================================================= #
+#                        Initializing Tensor                        #
+# ================================================================= #
+
+device = "cuda" if torch.cuda.is_available() else "cpu"  # Cuda to run on GPU!
+
+# Initializing a Tensor in this case of shape 2x3 (2 rows, 3 columns)
+my_tensor = torch.tensor(
+    [[1, 2, 3], [4, 5, 6]], dtype=torch.float32, device=device, requires_grad=True
+)
+
+# A few tensor attributes
+print(
+    f"Information about tensor: {my_tensor}"
+)  # Prints data of the tensor, device and grad info
+print(
+    "Type of Tensor {my_tensor.dtype}"
+)  # Prints dtype of the tensor (torch.float32, etc)
+print(
+    f"Device Tensor is on {my_tensor.device}"
+)  # Prints cpu/cuda (followed by gpu number)
+print(f"Shape of tensor {my_tensor.shape}")  # Prints shape, in this case 2x3
+print(f"Requires gradient: {my_tensor.requires_grad}")  # Prints true/false
+
+# Other common initialization methods (there exists a ton more)
+x = torch.empty(size=(3, 3))  # Tensor of shape 3x3 with uninitialized data
+x = torch.zeros((3, 3))  # Tensor of shape 3x3 with values of 0
+x = torch.rand(
+    (3, 3)
+)  # Tensor of shape 3x3 with values from uniform distribution in interval [0,1)
+x = torch.ones((3, 3))  # Tensor of shape 3x3 with values of 1
+x = torch.eye(5, 5)  # Returns Identity Matrix I, (I <-> Eye), matrix of shape 2x3
+x = torch.arange(
+    start=0, end=5, step=1
+)  # Tensor [0, 1, 2, 3, 4], note, can also do: torch.arange(11)
+x = torch.linspace(start=0.1, end=1, steps=10)  # x = [0.1, 0.2, ..., 1]
+x = torch.empty(size=(1, 5)).normal_(
+    mean=0, std=1
+)  # Normally distributed with mean=0, std=1
+x = torch.empty(size=(1, 5)).uniform_(
+    0, 1
+)  # Values from a uniform distribution low=0, high=1
+x = torch.diag(torch.ones(3))  # Diagonal matrix of shape 3x3
+
+# How to make initialized tensors to other types (int, float, double)
+# These will work even if you're on CPU or CUDA!
+tensor = torch.arange(4)  # [0, 1, 2, 3] Initialized as int64 by default
+print(f"Converted Boolean: {tensor.bool()}")  # Converted to Boolean: 1 if nonzero
+print(f"Converted int16 {tensor.short()}")  # Converted to int16
+print(
+    f"Converted int64 {tensor.long()}"
+)  # Converted to int64 (This one is very important, used super often)
+print(f"Converted float16 {tensor.half()}")  # Converted to float16
+print(
+    f"Converted float32 {tensor.float()}"
+)  # Converted to float32 (This one is very important, used super often)
+print(f"Converted float64 {tensor.double()}")  # Converted to float64
+
+# Array to Tensor conversion and vice-versa
+import numpy as np
+
+np_array = np.zeros((5, 5))
+tensor = torch.from_numpy(np_array)
+np_array_again = (
+    tensor.numpy()
+)  # np_array_again will be same as np_array (perhaps with numerical round offs)
+
+# =============================================================================== #
+#                        Tensor Math & Comparison Operations                      #
+# =============================================================================== #
+
+x = torch.tensor([1, 2, 3])
+y = torch.tensor([9, 8, 7])
+
+# -- Addition --
+z1 = torch.empty(3)
+torch.add(x, y, out=z1)  # This is one way
+z2 = torch.add(x, y)  # This is another way
+z = x + y  # This is my preferred way, simple and clean.
+
+# -- Subtraction --
+z = x - y  # We can do similarly as the preferred way of addition
+
+# -- Division (A bit clunky) --
+z = torch.true_divide(x, y)  # Will do element wise division if of equal shape
+
+# -- Inplace Operations --
+t = torch.zeros(3)
+
+t.add_(x)  # Whenever we have operation followed by _ it will mutate the tensor in place
+t += x  # Also inplace: t = t + x is not inplace, bit confusing.
+
+# -- Exponentiation (Element wise if vector or matrices) --
+z = x.pow(2)  # z = [1, 4, 9]
+z = x ** 2  # z = [1, 4, 9]
+
+
+# -- Simple Comparison --
+z = x > 0  # Returns [True, True, True]
+z = x < 0  # Returns [False, False, False]
+
+# -- Matrix Multiplication --
+x1 = torch.rand((2, 5))
+x2 = torch.rand((5, 3))
+x3 = torch.mm(x1, x2)  # Matrix multiplication of x1 and x2, out shape: 2x3
+x3 = x1.mm(x2)  # Similar as line above
+
+# -- Matrix Exponentiation --
+matrix_exp = torch.rand(5, 5)
+print(
+    matrix_exp.matrix_power(3)
+)  # is same as matrix_exp (mm) matrix_exp (mm) matrix_exp
+
+# -- Element wise Multiplication --
+z = x * y  # z = [9, 16, 21] = [1*9, 2*8, 3*7]
+
+# -- Dot product --
+z = torch.dot(x, y)  # Dot product, in this case z = 1*9 + 2*8 + 3*7
+
+# -- Batch Matrix Multiplication --
+batch = 32
+n = 10
+m = 20
+p = 30
+tensor1 = torch.rand((batch, n, m))
+tensor2 = torch.rand((batch, m, p))
+out_bmm = torch.bmm(tensor1, tensor2)  # Will be shape: (b x n x p)
+
+# -- Example of broadcasting --
+x1 = torch.rand((5, 5))
+x2 = torch.ones((1, 5))
+z = (
+    x1 - x2
+)  # Shape of z is 5x5: How? The 1x5 vector (x2) is subtracted for each row in the 5x5 (x1)
+z = (
+    x1 ** x2
+)  # Shape of z is 5x5: How? Broadcasting! Element wise exponentiation for every row
+
+# Other useful tensor operations
+sum_x = torch.sum(
+    x, dim=0
+)  # Sum of x across dim=0 (which is the only dim in our case), sum_x = 6
+values, indices = torch.max(x, dim=0)  # Can also do x.max(dim=0)
+values, indices = torch.min(x, dim=0)  # Can also do x.min(dim=0)
+abs_x = torch.abs(x)  # Returns x where abs function has been applied to every element
+z = torch.argmax(x, dim=0)  # Gets index of the maximum value
+z = torch.argmin(x, dim=0)  # Gets index of the minimum value
+mean_x = torch.mean(x.float(), dim=0)  # mean requires x to be float
+z = torch.eq(x, y)  # Element wise comparison, in this case z = [False, False, False]
+sorted_y, indices = torch.sort(y, dim=0, descending=False)
+
+z = torch.clamp(x, min=0)
+# All values < 0 set to 0 and values > 0 unchanged (this is exactly ReLU function)
+# If you want to values over max_val to be clamped, do torch.clamp(x, min=min_val, max=max_val)
+
+x = torch.tensor([1, 0, 1, 1, 1], dtype=torch.bool)  # True/False values
+z = torch.any(x)  # will return True, can also do x.any() instead of torch.any(x)
+z = torch.all(
+    x
+)  # will return False (since not all are True), can also do x.all() instead of torch.all()
+
+# ============================================================= #
+#                        Tensor Indexing                        #
+# ============================================================= #
+
+batch_size = 10
+features = 25
+x = torch.rand((batch_size, features))
+
+# Get first examples features
+print(x[0].shape)  # shape [25], this is same as doing x[0,:]
+
+# Get the first feature for all examples
+print(x[:, 0].shape)  # shape [10]
+
+# For example: Want to access third example in the batch and the first ten features
+print(x[2, 0:10].shape)  # shape: [10]
+
+# For example we can use this to, assign certain elements
+x[0, 0] = 100
+
+# Fancy Indexing
+x = torch.arange(10)
+indices = [2, 5, 8]
+print(x[indices])  # x[indices] = [2, 5, 8]
+
+x = torch.rand((3, 5))
+rows = torch.tensor([1, 0])
+cols = torch.tensor([4, 0])
+print(x[rows, cols])  # Gets second row fifth column and first row first column
+
+# More advanced indexing
+x = torch.arange(10)
+print(x[(x < 2) | (x > 8)])  # will be [0, 1, 9]
+print(x[x.remainder(2) == 0])  # will be [0, 2, 4, 6, 8]
+
+# Useful operations for indexing
+print(
+    torch.where(x > 5, x, x * 2)
+)  # gives [0, 2, 4, 6, 8, 10, 6, 7, 8, 9], all values x > 5 yield x, else x*2
+x = torch.tensor([0, 0, 1, 2, 2, 3, 4]).unique()  # x = [0, 1, 2, 3, 4]
+print(
+    x.ndimension()
+)  # The number of dimensions, in this case 1. if x.shape is 5x5x5 ndim would be 3
+x = torch.arange(10)
+print(
+    x.numel()
+)  # The number of elements in x (in this case it's trivial because it's just a vector)
+
+# ============================================================= #
+#                        Tensor Reshaping                       #
+# ============================================================= #
+
+x = torch.arange(9)
+
+# Let's say we want to reshape it to be 3x3
+x_3x3 = x.view(3, 3)
+
+# We can also do (view and reshape are very similar)
+# and the differences are in simple terms (I'm no expert at this),
+# is that view acts on contiguous tensors meaning if the
+# tensor is stored contiguously in memory or not, whereas
+# for reshape it doesn't matter because it will copy the
+# tensor to make it contiguously stored, which might come
+# with some performance loss.
+x_3x3 = x.reshape(3, 3)
+
+# If we for example do:
+y = x_3x3.t()
+print(
+    y.is_contiguous()
+)  # This will return False and if we try to use view now, it won't work!
+# y.view(9) would cause an error, reshape however won't
+
+# This is because in memory it was stored [0, 1, 2, ... 8], whereas now it's [0, 3, 6, 1, 4, 7, 2, 5, 8]
+# The jump is no longer 1 in memory for one element jump (matrices are stored as a contiguous block, and
+# using pointers to construct these matrices). This is a bit complicated and I need to explore this more
+# as well, at least you know it's a problem to be cautious of! A solution is to do the following
+print(y.contiguous().view(9))  # Calling .contiguous() before view and it works
+
+# Moving on to another operation, let's say we want to add two tensors dimensions togethor
+x1 = torch.rand(2, 5)
+x2 = torch.rand(2, 5)
+print(torch.cat((x1, x2), dim=0).shape)  # Shape: 4x5
+print(torch.cat((x1, x2), dim=1).shape)  # Shape 2x10
+
+# Let's say we want to unroll x1 into one long vector with 10 elements, we can do:
+z = x1.view(-1)  # And -1 will unroll everything
+
+# If we instead have an additional dimension and we wish to keep those as is we can do:
+batch = 64
+x = torch.rand((batch, 2, 5))
+z = x.view(
+    batch, -1
+)  # And z.shape would be 64x10, this is very useful stuff and is used all the time
+
+# Let's say we want to switch x axis so that instead of 64x2x5 we have 64x5x2
+# I.e we want dimension 0 to stay, dimension 1 to become dimension 2, dimension 2 to become dimension 1
+# Basically you tell permute where you want the new dimensions to be, torch.transpose is a special case
+# of permute (why?)
+z = x.permute(0, 2, 1)
+
+# Splits x last dimension into chunks of 2 (since 5 is not integer div by 2) the last dimension
+# will be smaller, so it will split it into two tensors: 64x2x3 and 64x2x2
+z = torch.chunk(x, chunks=2, dim=1)
+print(z[0].shape)
+print(z[1].shape)
+
+# Let's say we want to add an additional dimension
+x = torch.arange(
+    10
+)  # Shape is [10], let's say we want to add an additional so we have 1x10
+print(x.unsqueeze(0).shape)  # 1x10
+print(x.unsqueeze(1).shape)  # 10x1
+
+# Let's say we have x which is 1x1x10 and we want to remove a dim so we have 1x10
+x = torch.arange(10).unsqueeze(0).unsqueeze(1)
+
+# Perhaps unsurprisingly
+z = x.squeeze(1)  # can also do .squeeze(0) both returns 1x10
+
+# That was some essential Tensor operations, hopefully you found it useful!
--- a/ML/Pytorch/Basics/pytorch_tensorboard_.py
+++ b/ML/Pytorch/Basics/pytorch_tensorboard_.py
@@ -0,0 +1,142 @@
+"""
+Example code of how to use the TensorBoard in PyTorch.
+This code uses a lot of different functions from TensorBoard
+and tries to have them all in a compact way, it might not be
+super clear exactly what calls does what, for that I recommend
+watching the YouTube video.
+
+Video explanation: https://youtu.be/RLqsxWaQdHE
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-17 Initial coding
+"""
+
+# Imports
+import torch
+import torchvision
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard
+
+# Simple CNN
+class CNN(nn.Module):
+    def __init__(self, in_channels=1, num_classes=10):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1
+        )
+        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(
+            in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1
+        )
+        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(x)
+        x = F.relu(self.conv2(x))
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.fc1(x)
+        return x
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+in_channels = 1
+num_classes = 10
+num_epochs = 1
+
+# Load Data
+train_dataset = datasets.MNIST(
+    root="dataset/", train=True, transform=transforms.ToTensor(), download=True
+)
+
+# To do hyperparameter search, include more batch_sizes you want to try
+# and more learning rates!
+batch_sizes = [256]
+learning_rates = [0.001]
+classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+for batch_size in batch_sizes:
+    for learning_rate in learning_rates:
+        step = 0
+        # Initialize network
+        model = CNN(in_channels=in_channels, num_classes=num_classes)
+        model.to(device)
+        model.train()
+        criterion = nn.CrossEntropyLoss()
+        train_loader = DataLoader(
+            dataset=train_dataset, batch_size=batch_size, shuffle=True
+        )
+        optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0)
+        writer = SummaryWriter(
+            f"runs/MNIST/MiniBatchSize {batch_size} LR {learning_rate}"
+        )
+
+        # Visualize model in TensorBoard
+        images, _ = next(iter(train_loader))
+        writer.add_graph(model, images.to(device))
+        writer.close()
+
+        for epoch in range(num_epochs):
+            losses = []
+            accuracies = []
+
+            for batch_idx, (data, targets) in enumerate(train_loader):
+                # Get data to cuda if possible
+                data = data.to(device=device)
+                targets = targets.to(device=device)
+
+                # forward
+                scores = model(data)
+                loss = criterion(scores, targets)
+                losses.append(loss.item())
+
+                # backward
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                # Calculate 'running' training accuracy
+                features = data.reshape(data.shape[0], -1)
+                img_grid = torchvision.utils.make_grid(data)
+                _, predictions = scores.max(1)
+                num_correct = (predictions == targets).sum()
+                running_train_acc = float(num_correct) / float(data.shape[0])
+                accuracies.append(running_train_acc)
+
+                # Plot things to tensorboard
+                class_labels = [classes[label] for label in predictions]
+                writer.add_image("mnist_images", img_grid)
+                writer.add_histogram("fc1", model.fc1.weight)
+                writer.add_scalar("Training loss", loss, global_step=step)
+                writer.add_scalar(
+                    "Training Accuracy", running_train_acc, global_step=step
+                )
+
+                if batch_idx == 230:
+                    writer.add_embedding(
+                        features,
+                        metadata=class_labels,
+                        label_img=data,
+                        global_step=batch_idx,
+                    )
+                step += 1
+
+            writer.add_hparams(
+                {"lr": learning_rate, "bsize": batch_size},
+                {
+                    "accuracy": sum(accuracies) / len(accuracies),
+                    "loss": sum(losses) / len(losses),
+                },
+            )
--- a/ML/Pytorch/Basics/pytorch_transforms.py
+++ b/ML/Pytorch/Basics/pytorch_transforms.py
@@ -0,0 +1,155 @@
+"""
+Shows a small example of how to use transformations (perhaps unecessarily many)
+on CIFAR10 dataset and training on a small CNN toy network.
+
+Video explanation: https://youtu.be/Zvd276j9sZ8
+Got any questions leave a comment I'm pretty good at responding on youtube
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-09 Initial coding
+"""
+
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
+import torch.nn.functional as F  # All functions that don't have any parameters
+from torch.utils.data import (
+    DataLoader,
+)  # Gives easier dataset managment and creates mini batches
+import torchvision.datasets as datasets  # Has standard datasets we can import in a nice way
+import torchvision.transforms as transforms  # Transformations we can perform on our dataset
+
+# Simple CNN
+class CNN(nn.Module):
+    def __init__(self, in_channels, num_classes):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=8,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv2 = nn.Conv2d(
+            in_channels=8,
+            out_channels=16,
+            kernel_size=(3, 3),
+            stride=(1, 1),
+            padding=(1, 1),
+        )
+        self.fc1 = nn.Linear(16 * 8 * 8, num_classes)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        x = self.pool(x)
+        x = F.relu(self.conv2(x))
+        x = self.pool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.fc1(x)
+
+        return x
+
+
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Hyperparameters
+learning_rate = 1e-4
+batch_size = 64
+num_epochs = 5
+
+
+# Load pretrain model & modify it
+model = CNN(in_channels=3, num_classes=10)
+model.classifier = nn.Sequential(nn.Linear(512, 100), nn.ReLU(), nn.Linear(100, 10))
+model.to(device)
+
+# Load Data
+my_transforms = transforms.Compose(
+    [  # Compose makes it possible to have many transforms
+        transforms.Resize((36, 36)),  # Resizes (32,32) to (36,36)
+        transforms.RandomCrop((32, 32)),  # Takes a random (32,32) crop
+        transforms.ColorJitter(brightness=0.5),  # Change brightness of image
+        transforms.RandomRotation(
+            degrees=45
+        ),  # Perhaps a random rotation from -45 to 45 degrees
+        transforms.RandomHorizontalFlip(
+            p=0.5
+        ),  # Flips the image horizontally with probability 0.5
+        transforms.RandomVerticalFlip(
+            p=0.05
+        ),  # Flips image vertically with probability 0.05
+        transforms.RandomGrayscale(p=0.2),  # Converts to grayscale with probability 0.2
+        transforms.ToTensor(),  # Finally converts PIL image to tensor so we can train w. pytorch
+        transforms.Normalize(
+            mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]
+        ),  # Note: these values aren't optimal
+    ]
+)
+
+
+train_dataset = datasets.CIFAR10(
+    root="dataset/", train=True, transform=my_transforms, download=True
+)
+train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
+
+# Loss and optimizer
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+
+# Train Network
+for epoch in range(num_epochs):
+    losses = []
+
+    for batch_idx, (data, targets) in enumerate(train_loader):
+        # Get data to cuda if possible
+        data = data.to(device=device)
+        targets = targets.to(device=device)
+
+        # forward
+        scores = model(data)
+        loss = criterion(scores, targets)
+
+        losses.append(loss.item())
+        # backward
+        optimizer.zero_grad()
+        loss.backward()
+
+        # gradient descent or adam step
+        optimizer.step()
+
+    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses):.5f}")
+
+# Check accuracy on training & test to see how good our model
+
+
+def check_accuracy(loader, model):
+    if loader.dataset.train:
+        print("Checking accuracy on training data")
+    else:
+        print("Checking accuracy on test data")
+
+    num_correct = 0
+    num_samples = 0
+    model.eval()
+
+    with torch.no_grad():
+        for x, y in loader:
+            x = x.to(device=device)
+            y = y.to(device=device)
+
+            scores = model(x)
+            _, predictions = scores.max(1)
+            num_correct += (predictions == y).sum()
+            num_samples += predictions.size(0)
+
+        print(
+            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
+        )
+
+    model.train()
+
+
+check_accuracy(train_loader, model)
--- a/ML/Pytorch/Basics/set_deterministic_behavior/pytorch_set_seeds.py
+++ b/ML/Pytorch/Basics/set_deterministic_behavior/pytorch_set_seeds.py
@@ -0,0 +1,15 @@
+import random, torch, os, numpy as np
+
+def seed_everything(seed=42):
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+seed_everything()
+
+# Do training etc after running seed_everything
--- a/ML/Pytorch/CNN_architectures/lenet5_pytorch.py
+++ b/ML/Pytorch/CNN_architectures/lenet5_pytorch.py
@@ -0,0 +1,67 @@
+"""
+An implementation of LeNet CNN architecture.
+
+Video explanation: https://youtu.be/fcOW-Zyb5Bo
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-05 Initial coding
+
+"""
+
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+
+
+class LeNet(nn.Module):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.relu = nn.ReLU()
+        self.pool = nn.AvgPool2d(kernel_size=(2, 2), stride=(2, 2))
+        self.conv1 = nn.Conv2d(
+            in_channels=1,
+            out_channels=6,
+            kernel_size=(5, 5),
+            stride=(1, 1),
+            padding=(0, 0),
+        )
+        self.conv2 = nn.Conv2d(
+            in_channels=6,
+            out_channels=16,
+            kernel_size=(5, 5),
+            stride=(1, 1),
+            padding=(0, 0),
+        )
+        self.conv3 = nn.Conv2d(
+            in_channels=16,
+            out_channels=120,
+            kernel_size=(5, 5),
+            stride=(1, 1),
+            padding=(0, 0),
+        )
+        self.linear1 = nn.Linear(120, 84)
+        self.linear2 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.relu(self.conv1(x))
+        x = self.pool(x)
+        x = self.relu(self.conv2(x))
+        x = self.pool(x)
+        x = self.relu(
+            self.conv3(x)
+        )  # num_examples x 120 x 1 x 1 --> num_examples x 120
+        x = x.reshape(x.shape[0], -1)
+        x = self.relu(self.linear1(x))
+        x = self.linear2(x)
+        return x
+
+
+def test_lenet():
+    x = torch.randn(64, 1, 32, 32)
+    model = LeNet()
+    return model(x)
+
+
+if __name__ == "__main__":
+    out = test_lenet()
+    print(out.shape)
--- a/ML/Pytorch/CNN_architectures/pytorch_inceptionet.py
+++ b/ML/Pytorch/CNN_architectures/pytorch_inceptionet.py
@@ -0,0 +1,166 @@
+"""
+An implementation of GoogLeNet / InceptionNet from scratch.
+
+Video explanation: https://youtu.be/uQc4Fs7yx5I
+Got any questions leave a comment on youtube :)
+
+Programmed by Aladdin Persson <aladdin.persson at hotmail dot com>
+*    2020-04-07 Initial coding
+
+"""
+
+# Imports
+import torch
+import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
+
+
+class GoogLeNet(nn.Module):
+    def __init__(self, aux_logits=True, num_classes=1000):
+        super(GoogLeNet, self).__init__()
+        assert aux_logits == True or aux_logits == False
+        self.aux_logits = aux_logits
+
+        # Write in_channels, etc, all explicit in self.conv1, rest will write to
+        # make everything as compact as possible, kernel_size=3 instead of (3,3)
+        self.conv1 = conv_block(
+            in_channels=3,
+            out_channels=64,
+            kernel_size=(7, 7),
+            stride=(2, 2),
+            padding=(3, 3),
+        )
+
+        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.conv2 = conv_block(64, 192, kernel_size=3, stride=1, padding=1)
+        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        # In this order: in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
+        self.inception3a = Inception_block(192, 64, 96, 128, 16, 32, 32)
+        self.inception3b = Inception_block(256, 128, 128, 192, 32, 96, 64)
+        self.maxpool3 = nn.MaxPool2d(kernel_size=(3, 3), stride=2, padding=1)
+
+        self.inception4a = Inception_block(480, 192, 96, 208, 16, 48, 64)
+        self.inception4b = Inception_block(512, 160, 112, 224, 24, 64, 64)
+        self.inception4c = Inception_block(512, 128, 128, 256, 24, 64, 64)
+        self.inception4d = Inception_block(512, 112, 144, 288, 32, 64, 64)
+        self.inception4e = Inception_block(528, 256, 160, 320, 32, 128, 128)
+        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+        self.inception5a = Inception_block(832, 256, 160, 320, 32, 128, 128)
+        self.inception5b = Inception_block(832, 384, 192, 384, 48, 128, 128)
+
+        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
+        self.dropout = nn.Dropout(p=0.4)
+        self.fc1 = nn.Linear(1024, 1000)
+
+        if self.aux_logits:
+            self.aux1 = InceptionAux(512, num_classes)
+            self.aux2 = InceptionAux(528, num_classes)
+        else:
+            self.aux1 = self.aux2 = None
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.maxpool1(x)
+        x = self.conv2(x)
+        # x = self.conv3(x)
+        x = self.maxpool2(x)
+
+        x = self.inception3a(x)
+        x = self.inception3b(x)
+        x = self.maxpool3(x)
+
+        x = self.inception4a(x)
+
+        # Auxiliary Softmax classifier 1
+        if self.aux_logits and self.training:
+            aux1 = self.aux1(x)
+
+        x = self.inception4b(x)
+        x = self.inception4c(x)
+        x = self.inception4d(x)
+
+        # Auxiliary Softmax classifier 2
+        if self.aux_logits and self.training:
+            aux2 = self.aux2(x)
+
+        x = self.inception4e(x)
+        x = self.maxpool4(x)
+        x = self.inception5a(x)
+        x = self.inception5b(x)
+        x = self.avgpool(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.dropout(x)
+        x = self.fc1(x)
+
+        if self.aux_logits and self.training:
+            return aux1, aux2, x
+        else:
+            return x
+
+
+class Inception_block(nn.Module):
+    def __init__(
+        self, in_channels, out_1x1, red_3x3, out_3x3, red_5x5, out_5x5, out_1x1pool
+    ):
+        super(Inception_block, self).__init__()
+        self.branch1 = conv_block(in_channels, out_1x1, kernel_size=(1, 1))
+
+        self.branch2 = nn.Sequential(
+            conv_block(in_channels, red_3x3, kernel_size=(1, 1)),
+            conv_block(red_3x3, out_3x3, kernel_size=(3, 3), padding=(1, 1)),
+        )
+
+        self.branch3 = nn.Sequential(
+            conv_block(in_channels, red_5x5, kernel_size=(1, 1)),
+            conv_block(red_5x5, out_5x5, kernel_size=(5, 5), padding=(2, 2)),
+        )
+
+        self.branch4 = nn.Sequential(
+            nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
+            conv_block(in_channels, out_1x1pool, kernel_size=(1, 1)),
+        )
+
+    def forward(self, x):
+        return torch.cat(
+            [self.branch1(x), self.branch2(x), self.branch3(x), self.branch4(x)], 1
+        )
+
+
+class InceptionAux(nn.Module):
+    def __init__(self, in_channels, num_classes):
+        super(InceptionAux, self).__init__()
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(p=0.7)
+        self.pool = nn.AvgPool2d(kernel_size=5, stride=3)
+        self.conv = conv_block(in_channels, 128, kernel_size=1)
+        self.fc1 = nn.Linear(2048, 1024)
+        self.fc2 = nn.Linear(1024, num_classes)
+
+    def forward(self, x):
+        x = self.pool(x)
+        x = self.conv(x)
+        x = x.reshape(x.shape[0], -1)
+        x = self.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = self.fc2(x)
+
+        return x
+
+
+class conv_block(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(conv_block, self).__init__()
+        self.relu = nn.ReLU()
+        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
+        self.batchnorm = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        return self.relu(self.batchnorm(self.conv(x)))
+
+
+if __name__ == "__main__":
+    # N = 3 (Mini batch size)
+    x = torch.randn(3, 3, 224, 224)
+    model = GoogLeNet(aux_logits=True, num_classes=1000)
+    print(model(x)[2].shape)
--- a/Show More
+++ b/Show More