Open In Colab

Example: Predicting House Prices#

In this section, we will attempt to predict the median price of homes in a given Boston suburb in the mid-1970s, given data points about the suburb at the time, such as the crime rate, the local property tax rate, and so on.

See https://fairlearn.org/v0.12/user_guide/datasets/boston_housing_data.html for more information about this dataset, especially some important concerns about the fairness of the dataset.

from tensorflow.keras.datasets import boston_housing 

(train_data, train_targets), (test_data, test_targets) = ( boston_housing.load_data())
2025-05-08 15:04:38.153818: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-08 15:04:38.156997: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-08 15:04:38.165428: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1746716678.179268   54146 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746716678.183964   54146 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746716678.195553   54146 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746716678.195563   54146 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746716678.195565   54146 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746716678.195566   54146 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
2025-05-08 15:04:38.199583: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
    0/57026 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/step

57026/57026 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
print("Training data shape:", train_data.shape)
print("Training targets shape:", train_targets.shape)
print("Testing data shape:", test_data.shape)   
print("Testing targets shape:", test_targets.shape)
Training data shape: (404, 13)
Training targets shape: (404,)
Testing data shape: (102, 13)
Testing targets shape: (102,)
print(train_data[0])
print(train_targets[0])
[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]
15.2
import pandas as pd
# Load the dataset
(train_data, train_targets), _ = boston_housing.load_data()

# Feature names (Boston Housing dataset)
feature_names = [
    'CRIM',    # Crime rate per capita
    'ZN',      # Residential zoning
    'INDUS',   # Industrial areas proportion
    'CHAS',    # Charles River adjacency
    'NOX',     # Nitric oxides concentration
    'RM',      # Average rooms per dwelling
    'AGE',     # Units built before 1940
    'DIS',     # Distance to employment centers
    'RAD',     # Accessibility to highways
    'TAX',     # Property-tax rate
    'PTRATIO', # Pupil-teacher ratio
    'B',       # Proportion of Black residents
    'LSTAT'    # Lower socio-economic status (%)
]

# Convert data into a DataFrame
df_train_data = pd.DataFrame(train_data, columns=feature_names)

# Display the first 5 rows
print(df_train_data.head())
      CRIM    ZN  INDUS  CHAS    NOX     RM    AGE     DIS   RAD    TAX  \
0  1.23247   0.0   8.14   0.0  0.538  6.142   91.7  3.9769   4.0  307.0   
1  0.02177  82.5   2.03   0.0  0.415  7.610   15.7  6.2700   2.0  348.0   
2  4.89822   0.0  18.10   0.0  0.631  4.970  100.0  1.3325  24.0  666.0   
3  0.03961   0.0   5.19   0.0  0.515  6.037   34.5  5.9853   5.0  224.0   
4  3.69311   0.0  18.10   0.0  0.713  6.376   88.4  2.5671  24.0  666.0   

   PTRATIO       B  LSTAT  
0     21.0  396.90  18.72  
1     14.7  395.38   3.11  
2     20.2  375.52   3.26  
3     20.2  396.90   8.01  
4     20.2  391.43  14.65  
import matplotlib.pyplot as plt
import seaborn as sns
# Set up figure size
plt.figure(figsize=(14, 8))

# Generate boxplots
sns.boxplot(data=df_train_data, orient='h')

# Plot details
plt.title('Distribution of Boston Housing Dataset Features')
plt.xlabel('Feature Values')
sns.despine(trim=True)
plt.tight_layout()
plt.show()
../_images/25ba9bf505d7fcab755fa772b5a9e9d17c9010eb0f1480d2d428307d05a1041a.png

Prepare the Data#

norm_train_data = train_data.copy()
norm_test_data = test_data.copy()
mean = norm_train_data.mean(axis=0)
print("Mean:", mean)
norm_train_data -= mean 
std = norm_train_data.std(axis=0)
print("Standard deviation:", std)
norm_train_data /= std 
norm_test_data -= mean 
norm_test_data /= std
Mean: [3.74511057e+00 1.14801980e+01 1.11044307e+01 6.18811881e-02
 5.57355941e-01 6.26708168e+00 6.90106436e+01 3.74027079e+00
 9.44059406e+00 4.05898515e+02 1.84759901e+01 3.54783168e+02
 1.27408168e+01]
Standard deviation: [9.22929073e+00 2.37382770e+01 6.80287253e+00 2.40939633e-01
 1.17147847e-01 7.08908627e-01 2.79060634e+01 2.02770050e+00
 8.68758849e+00 1.66168506e+02 2.19765689e+00 9.39946015e+01
 7.24556085e+00]
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# Load the dataset
(train_data, _), (test_data, _) = boston_housing.load_data()

# Combine train and test data for visualization purposes only (not for training)
data = np.concatenate([train_data, test_data], axis=0)

# Visualize one feature before normalization (e.g., feature 5 - average number of rooms)
feature_idx = 7

plt.figure(figsize=(12, 5))

# Before normalization
plt.subplot(1, 2, 1)
plt.hist(data[:, feature_idx], bins=30, color='skyblue', edgecolor='black')
plt.title('Feature before normalization')
plt.xlabel(f'Value of feature {feature_idx}')
plt.ylabel('Frequency')

# Compute mean and std from training data only
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

# Normalize data
data_norm = (data - mean) / std

# After normalization
plt.subplot(1, 2, 2)
plt.hist(data_norm[:, feature_idx], bins=30, color='lightgreen', edgecolor='black')
plt.title(f'Feature after normalization: mean={mean[feature_idx]:.2f}, std={std[feature_idx]:.2f}')
plt.xlabel(f'Normalized value of feature {feature_idx}')
plt.ylabel('Frequency')
sns.despine()
plt.tight_layout()
plt.show()
../_images/23abd3a05e959ea82529cda2801e04f8fe3cd7da2f11cf51746ee7998e859f21.png
norm_mean = norm_train_data.mean(axis=0)
norm_std = norm_train_data.std(axis=0)
for i in range(0, 13):
    print("Feature", i)
    print("Original:   ", f"mean={mean[i]:.3f},  std={std[i]:.3f}")    
    print("Normalized: ", f"mean={norm_mean[i]:.3f},  std={norm_std[i]:.3f}")    
    print()
Feature 0
Original:    mean=3.745,  std=9.229
Normalized:  mean=-0.000,  std=1.000

Feature 1
Original:    mean=11.480,  std=23.738
Normalized:  mean=0.000,  std=1.000

Feature 2
Original:    mean=11.104,  std=6.803
Normalized:  mean=0.000,  std=1.000

Feature 3
Original:    mean=0.062,  std=0.241
Normalized:  mean=-0.000,  std=1.000

Feature 4
Original:    mean=0.557,  std=0.117
Normalized:  mean=-0.000,  std=1.000

Feature 5
Original:    mean=6.267,  std=0.709
Normalized:  mean=0.000,  std=1.000

Feature 6
Original:    mean=69.011,  std=27.906
Normalized:  mean=0.000,  std=1.000

Feature 7
Original:    mean=3.740,  std=2.028
Normalized:  mean=0.000,  std=1.000

Feature 8
Original:    mean=9.441,  std=8.688
Normalized:  mean=0.000,  std=1.000

Feature 9
Original:    mean=405.899,  std=166.169
Normalized:  mean=-0.000,  std=1.000

Feature 10
Original:    mean=18.476,  std=2.198
Normalized:  mean=0.000,  std=1.000

Feature 11
Original:    mean=354.783,  std=93.995
Normalized:  mean=0.000,  std=1.000

Feature 12
Original:    mean=12.741,  std=7.246
Normalized:  mean=0.000,  std=1.000

Building the Model#

from tensorflow import keras
from tensorflow.keras.layers import Dense

def build_model():
    model = keras.Sequential([
        Dense(64, activation="relu"),
        Dense(64, activation="relu"),
        Dense(1)])

    model.compile(
        optimizer="rmsprop", 
        loss="mse", 
        metrics=["mae"])

    return model

Mean Absolute Error (MAE) vs. Mean Squared Error (MSE)#

The Mean Absolute Error (MAE) and Mean Squared Error (MSE) both measure prediction errors but differ significantly in how they handle those errors: Suppose that \(\hat{y}\) are the predicted values and \(y\) are the actual values.

  • Mean Absolute Error (MAE) measures the average absolute difference:

\[ \text{MAE} = \frac{1}{n}\sum_{i=1}^{n}\left|y_i - \hat{y}_i\right| \]
  • Mean Squared Error (MSE) measures the average squared difference:

\[ \text{MSE} = \frac{1}{n}\sum_{i=1}^{n}(y_i - \hat{y}_i)^2 \]

When to use MAE or MSE?#

Use MAE when:#

  • You want a metric that’s easy to interpret (“On average, predictions are off by \(x\) units”).

  • You prefer robustness against outliers (large, rare errors don’t disproportionately affect results).

  • You need stable behavior regardless of occasional large errors.

Use MSE when:#

  • You specifically want to penalize large errors heavily.

  • Large prediction errors carry significant consequences, and small errors matter less.

  • Your optimization method benefits from smoother, more sensitive loss functions.

import numpy as np
import matplotlib.pyplot as plt

# Sample actual and predicted values
actual = np.array([0, 1, 2, 3, 4, 5])
predicted = np.array([0, 1.2, 1.8, 3.5, 2.0, 6])

# Calculate absolute errors and squared errors
absolute_errors = np.abs(actual - predicted)
squared_errors = (actual - predicted)**2

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# MAE visualization
axes[0].bar(range(len(actual)), absolute_errors, color='skyblue', edgecolor='black')
axes[0].set_title('Absolute Errors (MAE)')
axes[0].set_xlabel('Data Point')
axes[0].set_ylabel('Absolute Error')

# MSE visualization
axes[1].bar(range(len(actual)), squared_errors, color='salmon', edgecolor='black')
axes[1].set_title('Squared Errors (MSE)')
axes[1].set_xlabel('Data Point')
axes[1].set_ylabel('Squared Error')

# Add actual error values on top of bars for clarity
for i in range(len(actual)):
    axes[0].text(i, absolute_errors[i]+0.05, f"{absolute_errors[i]:.2f}", ha='center')
    axes[1].text(i, squared_errors[i]+0.05, f"{squared_errors[i]:.2f}", ha='center')

plt.tight_layout()
sns.despine()
plt.show()
../_images/736d365cdcaed860ba65e7088d19275384ff616e5b985d56f29f4672a10d8080.png

K-Fold Cross Validation#

K-Fold Cross-Validation is a robust method to evaluate the performance of machine learning models. The main idea is to repeatedly split the dataset into training and validation subsets in different ways.

This process helps ensure that the model is not just memorizing the training data but is genuinely learning to generalize to unseen data.

How K-Fold Cross-Validation Works#

  1. Split the Data:
    The dataset is divided into k equally sized folds (subsets).

  2. Training and Validation: For each fold:

    • The model is trained on k-1 folds (the training set).

    • The remaining fold is used for validation (the test set).

  3. Repeat: This process is repeated k times, with each fold being used as the validation set once.

  4. Average the Results: The performance metric (e.g., accuracy, MAE, MSE) is averaged across all k iterations to provide a more reliable estimate of the model’s performance.

Reasons to Use K-Fold Cross-Validation#

  • Better Estimate of Model Performance:
    It provides a more reliable estimate by averaging results across multiple splits, reducing variability caused by a single random train-test split.

  • Efficient Use of Data:
    Each data point is used exactly once for validation, ensuring that the entire dataset contributes to the evaluation of the model’s performance.

  • Reduces Overfitting and Bias:
    It helps detect whether the model generalizes well to unseen data, avoiding overly optimistic (biased) estimates of accuracy.

  • Tuning and Comparing Models:
    Facilitates comparison of different models or hyperparameters, giving you confidence in selecting the best model for your task.

In short, K-Fold Cross-Validation enhances the trustworthiness and stability of your model evaluation results.

k = 4 # number of folds

sample_train_data = np.random.uniform(size=(10, 3))
sample_target_data = np.random.randint(0, 45,size=(10,))
print("Sample train data shape:", sample_train_data.shape)
print(sample_train_data)
print("Sample train target shape:", sample_target_data.shape)
print(sample_target_data)

print()
num_val_samples = len(sample_train_data) // k
print("Number of validation samples:", num_val_samples)

i = 0 # the current fold

val_data = sample_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("Fold: ", i)
print("data: \n",val_data.shape)
print(val_data)
val_targets = sample_target_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("targets: \n", val_targets.shape)
print(val_targets)

partial_train_data = np.concatenate(
    [sample_train_data[:i * num_val_samples], 
        sample_train_data[(i + 1) * num_val_samples:]], axis=0) 
partial_train_targets = np.concatenate(
    [sample_target_data[:i * num_val_samples], 
        sample_target_data[(i + 1) * num_val_samples:]], axis=0) 
print("Partial train data shape:", partial_train_data.shape)
print(partial_train_data)
print("Partial train targets shape:", partial_train_targets.shape)
print(partial_train_targets)
print("\n------\n")

i = 1 # the current fold

val_data = sample_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("Fold: ", i)
print("data: \n",val_data.shape)
print(val_data)
val_targets = sample_target_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("targets: \n", val_targets.shape)
print(val_targets)

partial_train_data = np.concatenate(
    [sample_train_data[:i * num_val_samples], 
        sample_train_data[(i + 1) * num_val_samples:]], axis=0) 
partial_train_targets = np.concatenate(
    [sample_target_data[:i * num_val_samples], 
        sample_target_data[(i + 1) * num_val_samples:]], axis=0) 
print("Partial train data shape:", partial_train_data.shape)
print(partial_train_data)
print("Partial train targets shape:", partial_train_targets.shape)
print(partial_train_targets)
print("\n------\n")

i = 2 # the current fold

val_data = sample_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("Fold: ", i)
print("data: \n",val_data.shape)
print(val_data)
val_targets = sample_target_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("targets: \n", val_targets.shape)
print(val_targets)

partial_train_data = np.concatenate(
    [sample_train_data[:i * num_val_samples], 
        sample_train_data[(i + 1) * num_val_samples:]], axis=0) 
partial_train_targets = np.concatenate(
    [sample_target_data[:i * num_val_samples], 
        sample_target_data[(i + 1) * num_val_samples:]], axis=0) 
print("Partial train data shape:", partial_train_data.shape)
print(partial_train_data)
print("Partial train targets shape:", partial_train_targets.shape)
print(partial_train_targets)
print("\n------\n")


i = 3 # the current fold

val_data = sample_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("Fold: ", i)
print("data: \n",val_data.shape)
print(val_data)
val_targets = sample_target_data[i * num_val_samples: (i + 1) * num_val_samples] 
print("targets: \n", val_targets.shape)
print(val_targets)

partial_train_data = np.concatenate(
    [sample_train_data[:i * num_val_samples], 
        sample_train_data[(i + 1) * num_val_samples:]], axis=0) 
partial_train_targets = np.concatenate(
    [sample_target_data[:i * num_val_samples], 
        sample_target_data[(i + 1) * num_val_samples:]], axis=0) 
print("Partial train data shape:", partial_train_data.shape)
print(partial_train_data)
print("Partial train targets shape:", partial_train_targets.shape)
print(partial_train_targets)
print("\n------\n")
Sample train data shape: (10, 3)
[[0.30278832 0.5607239  0.35313692]
 [0.99026643 0.14895865 0.61142836]
 [0.78007374 0.4567382  0.00175286]
 [0.68070212 0.49413964 0.35861974]
 [0.31136232 0.22244844 0.94865185]
 [0.81898485 0.87513562 0.04812138]
 [0.12995803 0.50614451 0.07533182]
 [0.48893732 0.66953574 0.39752912]
 [0.94194884 0.74030068 0.71780485]
 [0.01307903 0.58069034 0.96185538]]
Sample train target shape: (10,)
[15 15 37  7  0 12  3  5 35 36]

Number of validation samples: 2
Fold:  0
data: 
 (2, 3)
[[0.30278832 0.5607239  0.35313692]
 [0.99026643 0.14895865 0.61142836]]
targets: 
 (2,)
[15 15]
Partial train data shape: (8, 3)
[[0.78007374 0.4567382  0.00175286]
 [0.68070212 0.49413964 0.35861974]
 [0.31136232 0.22244844 0.94865185]
 [0.81898485 0.87513562 0.04812138]
 [0.12995803 0.50614451 0.07533182]
 [0.48893732 0.66953574 0.39752912]
 [0.94194884 0.74030068 0.71780485]
 [0.01307903 0.58069034 0.96185538]]
Partial train targets shape: (8,)
[37  7  0 12  3  5 35 36]

------

Fold:  1
data: 
 (2, 3)
[[0.78007374 0.4567382  0.00175286]
 [0.68070212 0.49413964 0.35861974]]
targets: 
 (2,)
[37  7]
Partial train data shape: (8, 3)
[[0.30278832 0.5607239  0.35313692]
 [0.99026643 0.14895865 0.61142836]
 [0.31136232 0.22244844 0.94865185]
 [0.81898485 0.87513562 0.04812138]
 [0.12995803 0.50614451 0.07533182]
 [0.48893732 0.66953574 0.39752912]
 [0.94194884 0.74030068 0.71780485]
 [0.01307903 0.58069034 0.96185538]]
Partial train targets shape: (8,)
[15 15  0 12  3  5 35 36]

------

Fold:  2
data: 
 (2, 3)
[[0.31136232 0.22244844 0.94865185]
 [0.81898485 0.87513562 0.04812138]]
targets: 
 (2,)
[ 0 12]
Partial train data shape: (8, 3)
[[0.30278832 0.5607239  0.35313692]
 [0.99026643 0.14895865 0.61142836]
 [0.78007374 0.4567382  0.00175286]
 [0.68070212 0.49413964 0.35861974]
 [0.12995803 0.50614451 0.07533182]
 [0.48893732 0.66953574 0.39752912]
 [0.94194884 0.74030068 0.71780485]
 [0.01307903 0.58069034 0.96185538]]
Partial train targets shape: (8,)
[15 15 37  7  3  5 35 36]

------

Fold:  3
data: 
 (2, 3)
[[0.12995803 0.50614451 0.07533182]
 [0.48893732 0.66953574 0.39752912]]
targets: 
 (2,)
[3 5]
Partial train data shape: (8, 3)
[[0.30278832 0.5607239  0.35313692]
 [0.99026643 0.14895865 0.61142836]
 [0.78007374 0.4567382  0.00175286]
 [0.68070212 0.49413964 0.35861974]
 [0.31136232 0.22244844 0.94865185]
 [0.81898485 0.87513562 0.04812138]
 [0.94194884 0.74030068 0.71780485]
 [0.01307903 0.58069034 0.96185538]]
Partial train targets shape: (8,)
[15 15 37  7  0 12 35 36]

------
k = 4
num_val_samples = len(norm_train_data) // k 
num_epochs = 100  
all_scores = []

for i in range(k):  
    print(f"Processing fold #{i}") 
    val_data = norm_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples] 

    partial_train_data = np.concatenate(
        [norm_train_data[:i * num_val_samples], 
         norm_train_data[(i + 1) * num_val_samples:]], axis=0) 
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples], 
         train_targets[(i + 1) * num_val_samples:]], axis=0) 
    
    model = build_model() 
    model.fit(partial_train_data, 
              partial_train_targets, 
              epochs=num_epochs, 
              batch_size=16, 
              verbose=0)

    val_mse, val_mae = model.evaluate(
        val_data, 
        val_targets, 
        verbose=0) 
    
    all_scores.append(val_mae)
Processing fold #0
2025-05-08 15:04:41.801110: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
Processing fold #1
Processing fold #2
Processing fold #3
print(all_scores)
print(np.mean(all_scores))
[2.150498151779175, 2.6223154067993164, 2.4442193508148193, 2.3675527572631836]
2.3961464166641235
k = 4
num_val_samples = len(norm_train_data) // k 
num_epochs = 500  
all_mae_histories = []
for i in range(k):  
    print(f"Processing fold #{i}") 
    val_data = norm_train_data[i * num_val_samples: (i + 1) * num_val_samples] 
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples] 

    partial_train_data = np.concatenate(
        [norm_train_data[:i * num_val_samples], 
         norm_train_data[(i + 1) * num_val_samples:]], axis=0) 
    
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples], 
         train_targets[(i + 1) * num_val_samples:]], axis=0) 
    
    model = build_model() 
    history = model.fit(
        partial_train_data, 
        partial_train_targets, 
        validation_data=(val_data, val_targets),
        epochs=num_epochs, batch_size=16, verbose=0)

    mae_history = history.history["val_mae"]

    all_mae_histories.append(mae_history)
Processing fold #0
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[14], line 19
     14 partial_train_targets = np.concatenate(
     15     [train_targets[:i * num_val_samples], 
     16      train_targets[(i + 1) * num_val_samples:]], axis=0) 
     18 model = build_model() 
---> 19 history = model.fit(
     20     partial_train_data, 
     21     partial_train_targets, 
     22     validation_data=(val_data, val_targets),
     23     epochs=num_epochs, batch_size=16, verbose=0)
     25 mae_history = history.history["val_mae"]
     27 all_mae_histories.append(mae_history)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    115 filtered_tb = None
    116 try:
--> 117     return fn(*args, **kwargs)
    118 except Exception as e:
    119     filtered_tb = _process_traceback_frames(e.__traceback__)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py:371, in TensorFlowTrainer.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq)
    369 for step, iterator in epoch_iterator:
    370     callbacks.on_train_batch_begin(step)
--> 371     logs = self.train_function(iterator)
    372     callbacks.on_train_batch_end(step, logs)
    373     if self.stop_training:

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py:219, in TensorFlowTrainer._make_function.<locals>.function(iterator)
    215 def function(iterator):
    216     if isinstance(
    217         iterator, (tf.data.Iterator, tf.distribute.DistributedIterator)
    218     ):
--> 219         opt_outputs = multi_step_on_iterator(iterator)
    220         if not opt_outputs.has_value():
    221             raise StopIteration

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    148 filtered_tb = None
    149 try:
--> 150   return fn(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:833, in Function.__call__(self, *args, **kwds)
    830 compiler = "xla" if self._jit_compile else "nonXla"
    832 with OptionalXlaContext(self._jit_compile):
--> 833   result = self._call(*args, **kwds)
    835 new_tracing_count = self.experimental_get_tracing_count()
    836 without_tracing = (tracing_count == new_tracing_count)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:878, in Function._call(self, *args, **kwds)
    875 self._lock.release()
    876 # In this case we have not created variables on the first call. So we can
    877 # run the first trace but we should fail if variables are created.
--> 878 results = tracing_compilation.call_function(
    879     args, kwds, self._variable_creation_config
    880 )
    881 if self._created_variables:
    882   raise ValueError("Creating variables on a non-first call to a function"
    883                    " decorated with tf.function.")

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:139, in call_function(args, kwargs, tracing_options)
    137 bound_args = function.function_type.bind(*args, **kwargs)
    138 flat_inputs = function.function_type.unpack_inputs(bound_args)
--> 139 return function._call_flat(  # pylint: disable=protected-access
    140     flat_inputs, captured_inputs=function.captured_inputs
    141 )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/concrete_function.py:1322, in ConcreteFunction._call_flat(self, tensor_inputs, captured_inputs)
   1318 possible_gradient_type = gradients_util.PossibleTapeGradientTypes(args)
   1319 if (possible_gradient_type == gradients_util.POSSIBLE_GRADIENT_TYPES_NONE
   1320     and executing_eagerly):
   1321   # No tape is watching; skip to running the function.
-> 1322   return self._inference_function.call_preflattened(args)
   1323 forward_backward = self._select_forward_and_backward_functions(
   1324     args,
   1325     possible_gradient_type,
   1326     executing_eagerly)
   1327 forward_function, args_with_tangents = forward_backward.forward()

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:216, in AtomicFunction.call_preflattened(self, args)
    214 def call_preflattened(self, args: Sequence[core.Tensor]) -> Any:
    215   """Calls with flattened tensor inputs and returns the structured output."""
--> 216   flat_outputs = self.call_flat(*args)
    217   return self.function_type.pack_output(flat_outputs)

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/atomic_function.py:251, in AtomicFunction.call_flat(self, *args)
    249 with record.stop_recording():
    250   if self._bound_context.executing_eagerly():
--> 251     outputs = self._bound_context.call_function(
    252         self.name,
    253         list(args),
    254         len(self.function_type.flat_outputs),
    255     )
    256   else:
    257     outputs = make_call_op_in_graph(
    258         self,
    259         list(args),
    260         self._bound_context.function_call_options.as_attrs(),
    261     )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/context.py:1688, in Context.call_function(self, name, tensor_inputs, num_outputs)
   1686 cancellation_context = cancellation.context()
   1687 if cancellation_context is None:
-> 1688   outputs = execute.execute(
   1689       name.decode("utf-8"),
   1690       num_outputs=num_outputs,
   1691       inputs=tensor_inputs,
   1692       attrs=attrs,
   1693       ctx=self,
   1694   )
   1695 else:
   1696   outputs = execute.execute_with_cancellation(
   1697       name.decode("utf-8"),
   1698       num_outputs=num_outputs,
   (...)   1702       cancellation_manager=cancellation_context,
   1703   )

File /opt/hostedtoolcache/Python/3.11.12/x64/lib/python3.11/site-packages/tensorflow/python/eager/execute.py:53, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     51 try:
     52   ctx.ensure_initialized()
---> 53   tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     54                                       inputs, attrs, num_outputs)
     55 except core._NotOkStatusException as e:
     56   if name is not None:

KeyboardInterrupt: 
average_mae_history = [
    np.mean([x[i] for x in all_mae_histories]) 
                       for i in range(num_epochs)]
plt.plot(range(1, len(average_mae_history) + 1), average_mae_history) 
plt.xlabel("Epochs") 
plt.ylabel("Validation MAE") 
plt.show()
../_images/56f4277fcf6ff2d189098e65bd3ac0e51493fd2579da459bb98c437a5aa32c72.png
truncated_mae_history = average_mae_history[10:]

plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history) 
plt.xlabel("Epochs") 
plt.ylabel("Validation MAE") 
plt.show()
../_images/342c8176b015d371f23922cf5c859111bc597b964670e80e54e7ee453477adee.png
model = build_model() 

model.fit(
    train_data, 
    train_targets, 
    epochs=130, 
    batch_size=16, 
    verbose=0)

test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)

print(test_mae_score)
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 36.7568 - mae: 4.4139 
4.623236656188965
predictions = model.predict(test_data)

test_idx = 40
print(test_data[test_idx])
print(predictions[test_idx])
4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
[1.9510e-02 1.7500e+01 1.3800e+00 0.0000e+00 4.1610e-01 7.1040e+00
 5.9500e+01 9.2229e+00 3.0000e+00 2.1600e+02 1.8600e+01 3.9324e+02
 8.0500e+00]
[22.461094]
# plot the histogram of the predictions

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))

# Plot predictions
sns.histplot(predictions, bins=30, color='skyblue', edgecolor='black', label='Predictions', kde=True)

# Plot actual targets
sns.histplot(test_targets, bins=30, color='salmon', edgecolor='black', alpha=0.6, label='Actual Targets', kde=True)

plt.xlabel('Housing Prices')
plt.ylabel('Frequency')
plt.title('Comparison of Predicted and Actual Housing Prices')
plt.legend()
sns.despine()
plt.show()
../_images/904e792f12c948f3d1d85520dfcd6e05d823fc0782ffde4e57d4fae62739d594.png