import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
import numpy as np
import os, pathlib
from imutils import paths
import random
from pathlib import Path
from PIL import Image
from collections import Counter
from tensorflow.keras.models import load_model
from tensorflow.keras import models, layers
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

import os, shutil, pathlib

original_dir = pathlib.Path("../data/kaggle_dogs_vs_cats/train")
new_base_dir = pathlib.Path("../data/kaggle_dogs_vs_cats_small")

def make_subset(subset_name, start_index, end_index):
    for category in ("cat", "dog"):
        dir = new_base_dir / subset_name / category
        os.makedirs(dir)
        fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
        for fname in fnames:
            shutil.copyfile(src=original_dir / fname,
                            dst=dir / fname)

make_subset("train", start_index=0, end_index=1000)
make_subset("validation", start_index=1000, end_index=1500)
make_subset("test", start_index=1500, end_index=2500)

new_base_dir = pathlib.Path("./data/kaggle_dogs_vs_cats_small")

# Count the number of files in the new base directory
for subset in ["train", "validation", "test"]:
    num_files = len(list(paths.list_images(new_base_dir/subset)))
    print(f"Number of files in {subset}: {num_files}")

Number of files in train: 2000
Number of files in validation: 1000
Number of files in test: 2000

def show_random_images(base_path, subset, category, n=5):
    image_dir = Path(base_path) / subset / category
    images = list(image_dir.glob("*.jpg"))
    random_images = random.sample(images, n)

    plt.figure(figsize=(12, 5))
    for i, image_path in enumerate(random_images):
        img = Image.open(image_path)
        plt.subplot(1, n, i+1)
        plt.imshow(img)
        plt.axis("off")
        plt.title(image_path.name)
    plt.suptitle(f"Random {category} images from {subset} set", fontsize=12)
    plt.tight_layout()
    plt.show()

show_random_images(new_base_dir, 'train', 'cat')
show_random_images(new_base_dir, 'train', 'dog')

sizes = []
modes = []

print(f"VALIDATING BALANCE OF DATASET...")

for subset in ["train", "validation", "test"]:
    print(f"\n--- {subset.upper()} ---")
    for category in ["cat", "dog"]:       
        count = len(list((new_base_dir / subset / category).glob("*.jpg")))
        print(f"{category}: {count}") 
        for path in (new_base_dir / subset / category).glob("*.jpg"):
            img = Image.open(path)
            sizes.append(img.size)
            modes.append(img.mode)  # 'RGB', 'L', 'RGBA', etc.
size_counts = Counter(sizes)
mode_counts = Counter(modes)

print(f"\nVALIDATING IMAGE SIZES (10 MOST COMMON)...")
for size, count in size_counts.most_common(10):
    print(f"{size}: {count} images")

print(f"\nVALIDATING IMAGE MODES...")
for mode, count in Counter(modes).most_common():
    print(f"{mode}: {count} images")

VALIDATING BALANCE OF DATASET...

--- TRAIN ---
cat: 1000
dog: 1000

--- VALIDATION ---
cat: 500
dog: 500

--- TEST ---
cat: 1000
dog: 1000

VALIDATING IMAGE SIZES (10 MOST COMMON)...
(499, 375): 578 images
(500, 374): 576 images
(375, 499): 59 images
(319, 240): 48 images
(374, 500): 47 images
(320, 239): 44 images
(499, 333): 41 images
(500, 332): 37 images
(500, 331): 25 images
(499, 332): 23 images

VALIDATING IMAGE MODES...
RGB: 5000 images

widths = [w for w, h in sizes]
heights = [h for w, h in sizes]

# Plot
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(widths, bins=30, color='skyblue', edgecolor='black')
plt.title("Distribution of widths")
plt.xlabel("widths (pixels)")
plt.ylabel("Frequency")

plt.subplot(1, 2, 2)
plt.hist(heights, bins=30, color='salmon', edgecolor='black')
plt.title("Distribution of heights")
plt.xlabel("heights (pixels)")
plt.ylabel("Frequency")

plt.tight_layout()
plt.show()

# Mapping class name to numeric label
class_labels = {"cat": 0, "dog": 1}

def load_images_from_folder(folder_path, target_size=(150, 150)):

    x = []
    labels = []

    image_paths = list(paths.list_images(folder_path))
    random.shuffle(image_paths)


    for image_path, i in zip(image_paths, range(len(image_paths))):
        print(f"Loading image... {i} / {len(image_paths)}", end="\r")
        try:
            img = Image.open(image_path).resize(target_size) # Resize image to target size
            img = np.array(img).astype("float32") / 255.0  # Normalize to [0, 1]
            x.append(img)

            # Determine label from filename
            file_name = os.path.basename(image_path)
            if file_name.startswith("cat"):
                labels.append(class_labels["cat"])
            elif file_name.startswith("dog"):
                labels.append(class_labels["dog"])
            else:
                print(f"Warning: {file_name} does not match known class... Skipping...")

        except Exception as e:
            print(f"Error loading image {image_path}: {e}")

    x = np.array(x)
    labels = np.array(labels)

    print(f"✅ Loaded {len(x)} images from {folder_path.name}. Shape: {x.shape}")
    return x, labels

# Example usage:
x_train, y_train = load_images_from_folder(new_base_dir / "train", target_size=(150, 150))
x_val, y_val     = load_images_from_folder(new_base_dir / "validation", target_size=(150, 150))
x_test, y_test   = load_images_from_folder(new_base_dir / "test", target_size=(150, 150))

✅ Loaded 2000 images from train. Shape: (2000, 150, 150, 3)
✅ Loaded 1000 images from validation. Shape: (1000, 150, 150, 3)
✅ Loaded 2000 images from test. Shape: (2000, 150, 150, 3)

def show_image(x, label_batch, class_labels=class_labels, index=6):

    plt.figure(figsize=(6, 3))
    class_names = list(class_labels.keys())  # Get class
    idxs = np.random.choice(len(x), size=index, replace=False)

    for i, idx in enumerate(idxs):
        ax = plt.subplot(2, 3, i + 1)  # Arrange in 2*3 grid
        plt.imshow(x[idx])  
        plt.title(class_names[label_batch[idx]].title())  # Display the label
        plt.axis('off')  # Hide axis ticks
    plt.tight_layout()               
    plt.show()


show_image(x_train, y_train)

model = models.Sequential() # Create a Sequential model, piled layer by layer

# FIRST LAYER: Conv2D (32) 
model.add(layers.Conv2D(
    filters=32,               # filter or feature detection size
    kernel_size=(3, 3),       # size of each filter (3x3 pixels)
    activation='relu',        # Relu activation Function to introduce non-linearity 
    input_shape=(150, 150, 3) # input shape: 150*150 pixels with 3 channels (RGB)
))

# SECOND LAYER: MaxPooling2D (reducing image size)
model.add(layers.MaxPooling2D(
    pool_size=(2, 2) # Only keep the maximum value in each 2x2 region
))

# THIRD LAYER: Conv2D (64) 
model.add(layers.Conv2D(
    filters=64,       # Increasing the number of filters to learn more complex features        
    kernel_size=(3, 3),       
    activation='relu',        
))

# FOURTH LAYER: MaxPooling2D (reducing image size)
model.add(layers.MaxPooling2D(
    pool_size=(2, 2) # Only keep the maximum value in each 2x2 region
))

# FIFTH LAYER: FLATTEN TO CONVERT 3D TO 1D
model.add(layers.Flatten()) # Output 1D to feed into Dense layers

# SIXTH LAYER: Dense (128) Fully Connected + Dropout (0.5)
model.add(layers.Dense(
    units=64,         # Number of neurons in this layer
    activation='relu' 
))

# SEVENTH LAYER: Dropout (0.6)
model.add(layers.Dropout(rate=0.6)) # Dropout to prevent overfitting

# FINAL LAYER: Dense (1) Output Layer
model.add(layers.Dense(
    units=1,            # Single neuron for binary output (0 or 1)
    activation='sigmoid' # Outputs a probability between 0 and 1
))

c:\Users\paula\github-classroom\CSCN8010 - Foundations ML Frameworks\CSCN8010-PR-Lab3\venvPR\Lib\site-packages\keras\src\layers\convolutional\base_conv.py:113: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                 │ (None, 148, 148, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 74, 74, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 72, 72, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 36, 36, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 82944)          │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 64)             │     5,308,480 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 1)              │            65 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 5,327,937 (20.32 MB)

 Trainable params: 5,327,937 (20.32 MB)

 Non-trainable params: 0 (0.00 B)

# Compile the model with appropriate loss function and optimizer
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Create a callback to save the best version of the model (based on validation accuracy)
checkpoint_cb = ModelCheckpoint(
    filepath='best_model.h5',        # File where the best model will be saved
    monitor='val_accuracy',          # Metric to monitor
    save_best_only=True,             # Only save the model if it's the best so far
    mode='max',                      # We want to maximize validation accuracy
    verbose=1                        # Print a message each time the model is saved
)


earlystop_cb = EarlyStopping(
    monitor='val_accuracy',
    patience=3,              # Stop after 3 epochs without improvement
    restore_best_weights=True
)

# Train the model using training data, validate on validation data
history = model.fit(
    x_train, y_train,               # Training data and labels
    epochs=20,                      # Number of times the model will see the full dataset
    batch_size=32,                 # Number of samples per gradient update
    validation_data=(x_val, y_val),# Validation data to evaluate on after each epoch
    callbacks=[checkpoint_cb, earlystop_cb]  # Save best model + stop if no improvement
)

Epoch 1/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 219ms/step - accuracy: 0.4858 - loss: 1.0680
Epoch 1: val_accuracy improved from None to 0.56200, saving model to best_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 17s 258ms/step - accuracy: 0.5185 - loss: 0.8091 - val_accuracy: 0.5620 - val_loss: 0.6880
Epoch 2/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 215ms/step - accuracy: 0.5892 - loss: 0.6764
Epoch 2: val_accuracy improved from 0.56200 to 0.59300, saving model to best_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 15s 241ms/step - accuracy: 0.5920 - loss: 0.6707 - val_accuracy: 0.5930 - val_loss: 0.6616
Epoch 3/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 203ms/step - accuracy: 0.6223 - loss: 0.6605
Epoch 3: val_accuracy improved from 0.59300 to 0.63600, saving model to best_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 14s 230ms/step - accuracy: 0.6215 - loss: 0.6591 - val_accuracy: 0.6360 - val_loss: 0.6568
Epoch 4/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 202ms/step - accuracy: 0.6748 - loss: 0.6077
Epoch 4: val_accuracy improved from 0.63600 to 0.65900, saving model to best_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 14s 228ms/step - accuracy: 0.6900 - loss: 0.5955 - val_accuracy: 0.6590 - val_loss: 0.6394
Epoch 5/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 208ms/step - accuracy: 0.7484 - loss: 0.5283
Epoch 5: val_accuracy improved from 0.65900 to 0.66800, saving model to best_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

# Accuracy plot
plt.figure(figsize=(8, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Load the VGG16 base model without the top classifier layers
vgg_base = VGG16(
    weights='imagenet',        # Load pre-trained weights
    include_top=False,         
    input_shape=(150, 150, 3)  # Match the shape of your input images
)

vgg_base.trainable = False  # Freeze all convolutional layers

# Create a new model on top of the frozen VGG16 base
model_vgg = models.Sequential()

# Add the VGG16 convolutional base
model_vgg.add(vgg_base)

# Add custom classifier on top

# FIRST LAYER: Flatten the output of the conv base
model_vgg.add(layers.Flatten())            

# SECOND LAYER: Fully connected layer
model_vgg.add(layers.Dense(64, activation='relu'))     

# THIRD LAYER: Dropout for regularization
model_vgg.add(layers.Dropout(0.5))  

#FOURTH LAYER: Final Dense layer for binary classification (Output layer)
model_vgg.add(layers.Dense(1, activation='sigmoid'))

# Compile the model with appropriate loss function and optimizer
model_vgg.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define callbacks
checkpoint_cb = ModelCheckpoint(
    filepath='best_vgg_model.h5',     # File to save best version of VGG16 model
    monitor='val_accuracy',           # Watch validation accuracy
    save_best_only=True,              # Save only the best model
    mode='max',
    verbose=1
)


earlystop_cb = EarlyStopping(
    monitor='val_accuracy',          # Stop if val accuracy stops improving
    patience=3,                      # Wait 3 epochs before stopping
    restore_best_weights=True
)

# Train the model using training data, validate on validation data
history_vgg = model_vgg.fit(
    x_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(x_val, y_val),
    callbacks=[checkpoint_cb, earlystop_cb]
)

Epoch 1/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 812ms/step - accuracy: 0.6986 - loss: 0.6018
Epoch 1: val_accuracy improved from None to 0.86200, saving model to best_vgg_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 81s 1s/step - accuracy: 0.7950 - loss: 0.4427 - val_accuracy: 0.8620 - val_loss: 0.2879
Epoch 2/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 853ms/step - accuracy: 0.8860 - loss: 0.2894
Epoch 2: val_accuracy improved from 0.86200 to 0.89500, saving model to best_vgg_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 80s 1s/step - accuracy: 0.8825 - loss: 0.2798 - val_accuracy: 0.8950 - val_loss: 0.2659
Epoch 3/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 836ms/step - accuracy: 0.9142 - loss: 0.2128
Epoch 3: val_accuracy improved from 0.89500 to 0.90200, saving model to best_vgg_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 79s 1s/step - accuracy: 0.9100 - loss: 0.2298 - val_accuracy: 0.9020 - val_loss: 0.2342
Epoch 4/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 818ms/step - accuracy: 0.9249 - loss: 0.2011
Epoch 4: val_accuracy did not improve from 0.90200
63/63 ━━━━━━━━━━━━━━━━━━━━ 77s 1s/step - accuracy: 0.9180 - loss: 0.2068 - val_accuracy: 0.8980 - val_loss: 0.2361
Epoch 5/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 830ms/step - accuracy: 0.9255 - loss: 0.1737
Epoch 5: val_accuracy improved from 0.90200 to 0.90300, saving model to best_vgg_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

63/63 ━━━━━━━━━━━━━━━━━━━━ 78s 1s/step - accuracy: 0.9355 - loss: 0.1576 - val_accuracy: 0.9030 - val_loss: 0.2320
Epoch 6/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 881ms/step - accuracy: 0.9474 - loss: 0.1402
Epoch 6: val_accuracy did not improve from 0.90300
63/63 ━━━━━━━━━━━━━━━━━━━━ 82s 1s/step - accuracy: 0.9475 - loss: 0.1414 - val_accuracy: 0.8960 - val_loss: 0.2591
Epoch 7/20
63/63 ━━━━━━━━━━━━━━━━━━━━ 0s 843ms/step - accuracy: 0.9588 - loss: 0.1036
Epoch 7: val_accuracy improved from 0.90300 to 0.90700, saving model to best_vgg_model.h5

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`.

# Accuracy plot
plt.figure(figsize=(8, 5))

plt.subplot(1, 2, 1)
plt.plot(history_vgg.history['accuracy'], label='Train Accuracy')
plt.plot(history_vgg.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 2, 2)
plt.plot(history_vgg.history['loss'], label='Train Loss')
plt.plot(history_vgg.history['val_loss'], label='Validation Loss')
plt.title('Loss over epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

def evaluate_models(name, model_path):
    best_model = load_model(model_path)
    # Predictions
    y_probs = best_model.predict(x_test)
    y_preds = (y_probs > 0.5).astype("int32") # Convert probs to class
    
    # Accuracy and loss
    test_loss, test_accuracy = best_model.evaluate(x_test, y_test)
    print(f"\n🔍 Evaluation for {name}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    print(f"Test Loss: {test_loss:.4f}")

    # Confusion Matrix - Positives and False
    cm = confusion_matrix(y_test, y_preds)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f"Confusion Matrix – {name}")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

    #classification_report - precision, recall y F1-score
    print("Classification Report:")
    print(classification_report(y_test, y_preds, target_names=["Cat", "Dog"]))


    # precision_recall_curve- Variation
    precisions, recalls, thresholds = precision_recall_curve(y_test, y_probs)
    
    plt.figure(figsize=(6,5))
    plt.plot(recalls, precisions, marker='.')
    plt.title(f"Precision–Recall Curve – {name}")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.grid(True)
    plt.show()

evaluate_models('Custom CNN','best_model.h5')

WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.

63/63 ━━━━━━━━━━━━━━━━━━━━ 3s 48ms/step
63/63 ━━━━━━━━━━━━━━━━━━━━ 3s 47ms/step - accuracy: 0.6785 - loss: 1.0055

🔍 Evaluation for Custom CNN
Test Accuracy: 0.6785
Test Loss: 1.0055

Classification Report:
              precision    recall  f1-score   support

         Cat       0.69      0.65      0.67      1000
         Dog       0.67      0.70      0.69      1000

    accuracy                           0.68      2000
   macro avg       0.68      0.68      0.68      2000
weighted avg       0.68      0.68      0.68      2000

evaluate_models("VGG16 Fine-Tuned", "best_vgg_model.h5")

WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.

63/63 ━━━━━━━━━━━━━━━━━━━━ 51s 799ms/step
63/63 ━━━━━━━━━━━━━━━━━━━━ 52s 814ms/step - accuracy: 0.8930 - loss: 0.2997

🔍 Evaluation for VGG16 Fine-Tuned
Test Accuracy: 0.8930
Test Loss: 0.2997

Classification Report:
              precision    recall  f1-score   support

         Cat       0.89      0.89      0.89      1000
         Dog       0.89      0.89      0.89      1000

    accuracy                           0.89      2000
   macro avg       0.89      0.89      0.89      2000
weighted avg       0.89      0.89      0.89      2000

def show_misclassified_images(model_path, name, num_images=9):
    model = load_model(model_path)
    y_probs = model.predict(x_test)
    y_preds = (y_probs > 0.5).astype("int32")

    misclassified_idxs = np.where(y_preds.flatten() != y_test)[0]

    if len(misclassified_idxs) == 0:
        print(f"No misclassified images found for {name}.")
        return

    sample_idxs = np.random.choice(misclassified_idxs, size=min(num_images, len(misclassified_idxs)), replace=False)

    plt.figure(figsize=(12, 6))
    for i, idx in enumerate(sample_idxs):
        plt.subplot(3, 3, i + 1)
        plt.imshow(x_test[idx])
        true_label = "Dog" if y_test[idx] == 1 else "Cat"
        pred_label = "Dog" if y_preds[idx] == 1 else "Cat"
        plt.title(f"True: {true_label}, Pred: {pred_label}")
        plt.axis('off')

    plt.suptitle(f"Misclassified Examples – {name}", fontsize=14)
    plt.tight_layout()
    plt.show()

show_misclassified_images("best_model.h5", "Custom CNN")

WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.

63/63 ━━━━━━━━━━━━━━━━━━━━ 3s 49ms/step

show_misclassified_images("best_vgg_model.h5", "VGG16 Fine-Tuned")

WARNING:absl:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.

63/63 ━━━━━━━━━━━━━━━━━━━━ 50s 786ms/step

CSN8010 Practical Lab 3 Vanilla CNN and Fine-Tune VGG16 - for Dogs and Cats Classification¶

1. Get the data¶

2. Data Exploration and Preprocessing¶

Preprocessing the images¶

3. Architecture Design¶

CNN Sequential Model¶

Setting the Callbacks and EarlyStopping¶

Training the model¶

Fine- Tune VGG16¶

Setting the callbacks and earlystop for VGG16¶

Training the model with VGG16¶

4. Performing the models¶

Creating a function to evaluate models¶

Test Set Evaluation – Custom Convolutional Model¶

Test Set Evaluation – VGG16 Fine-Tuned Model¶

Conclusions - Comparing models¶

Showing Misclassified Images¶

5. Final Conclusions¶