import os import matplotlib.pyplot os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import tensorflow_datasets as tfds physical_devices = tf.config.list_physical_devices("GPU") tf.config.experimental.set_memory_growth(physical_devices[0], True) (ds_train, ds_test), ds_info = tfds.load( "mnist", split=["train", "test"], shuffle_files=True, as_supervised=True, # will return tuple (img, label) otherwise dict with_info=True, # able to get info about dataset ) # fig = tfds.show_examples(ds_train, ds_info, rows=4, cols=4) # print(ds_info) def normalize_img(image, label): """Normalizes images""" return tf.cast(image, tf.float32) / 255.0, label AUTOTUNE = tf.data.experimental.AUTOTUNE BATCH_SIZE = 128 # Setup for train dataset ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE) ds_train = ds_train.cache() ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples) ds_train = ds_train.batch(BATCH_SIZE) ds_train = ds_train.prefetch(AUTOTUNE) # Setup for test Dataset ds_test = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE) ds_test = ds_train.batch(128) ds_test = ds_train.prefetch(AUTOTUNE) model = keras.Sequential( [ keras.Input((28, 28, 1)), layers.Conv2D(32, 3, activation="relu"), layers.Flatten(), tf.keras.layers.Dense(10, activation="softmax"), ] ) model.compile( optimizer=keras.optimizers.Adam(0.001), loss=keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"], ) model.fit(ds_train, epochs=5, verbose=2) model.evaluate(ds_test) (ds_train, ds_test), ds_info = tfds.load( "imdb_reviews", split=["train", "test"], shuffle_files=True, as_supervised=True, # will return tuple (img, label) otherwise dict with_info=True, # able to get info about dataset ) tokenizer = tfds.features.text.Tokenizer() def build_vocabulary(): vocabulary = set() for text, _ in ds_train: vocabulary.update(tokenizer.tokenize(text.numpy().lower())) return vocabulary vocabulary = build_vocabulary() encoder = tfds.features.text.TokenTextEncoder( list(vocabulary), oov_token="", lowercase=True, tokenizer=tokenizer ) def my_enc(text_tensor, label): encoded_text = encoder.encode(text_tensor.numpy()) return encoded_text, label def encode_map_fn(text, label): # py_func doesn't set the shape of the returned tensors. encoded_text, label = tf.py_function( my_enc, inp=[text, label], Tout=(tf.int64, tf.int64) ) # `tf.data.Datasets` work best if all components have a shape set # so set the shapes manually: encoded_text.set_shape([None]) label.set_shape([]) return encoded_text, label AUTOTUNE = tf.data.experimental.AUTOTUNE ds_train = ds_train.map(encode_map_fn, num_parallel_calls=AUTOTUNE) ds_train = ds_train.cache() ds_train = ds_train.shuffle(1000) ds_train = ds_train.padded_batch(32, padded_shapes=([None], ())) ds_train = ds_train.prefetch(AUTOTUNE) ds_test = ds_test.map(encode_map_fn) ds_test = ds_test.padded_batch(32, padded_shapes=([None], ())) model = keras.Sequential( [ layers.Masking(mask_value=0), layers.Embedding(input_dim=len(vocabulary) + 2, output_dim=32), layers.GlobalAveragePooling1D(), layers.Dense(64, activation="relu"), layers.Dense(1), ] ) model.compile( loss=keras.losses.BinaryCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(3e-4, clipnorm=1), metrics=["accuracy"], ) model.fit(ds_train, epochs=15, verbose=2) model.evaluate(ds_test)