Real world use case

Below, we use the mini-gpr package to fit a model to the airfoil self-noise dataset.

[1]:
import pandas as pd

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00291/airfoil_self_noise.dat"

inputs = [
    "frequency",
    "angle_of_attack",
    "chord_length",
    "free_stream_velocity",
    "suction_side_displacement_thickness",
]
output = "scaled_sound_pressure_level"
cols = inputs + [output]

df = pd.read_csv(url, sep=r"\t|\s+", header=None, names=cols, engine="python")
df
[1]:
frequency angle_of_attack chord_length free_stream_velocity suction_side_displacement_thickness scaled_sound_pressure_level
0 800 0.0 0.3048 71.3 0.002663 126.201
1 1000 0.0 0.3048 71.3 0.002663 125.201
2 1250 0.0 0.3048 71.3 0.002663 125.951
3 1600 0.0 0.3048 71.3 0.002663 127.591
4 2000 0.0 0.3048 71.3 0.002663 127.461
... ... ... ... ... ... ...
1498 2500 15.6 0.1016 39.6 0.052849 110.264
1499 3150 15.6 0.1016 39.6 0.052849 109.254
1500 4000 15.6 0.1016 39.6 0.052849 106.604
1501 5000 15.6 0.1016 39.6 0.052849 106.224
1502 6300 15.6 0.1016 39.6 0.052849 104.204

1503 rows × 6 columns

[2]:
# take a random 500 training points, and use the rest for testing
train = df.sample(500)
test = df.drop(train.index)
len(train), len(test)
[2]:
(500, 1003)
[3]:
from sklearn.preprocessing import StandardScaler

input_scaler = StandardScaler()
output_scaler = StandardScaler()

X = input_scaler.fit_transform(train[inputs])
y = output_scaler.fit_transform(train[[output]])
[4]:
from mini_gpr.kernels import RBF
from mini_gpr.models import GPR

model = GPR(RBF(sigma=[0.05] * 5), noise=0.1)
model.fit(X, y)
[5]:
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'svg'

raw_pred_train = model.predict(X)
pred_train = output_scaler.inverse_transform(
    raw_pred_train.reshape(-1, 1)
).flatten()

raw_pred_test = model.predict(input_scaler.transform(test[inputs]))
pred_test = output_scaler.inverse_transform(
    raw_pred_test.reshape(-1, 1)
).flatten()

plt.figure(figsize=(3, 3))
plt.plot(pred_test, test[output], ".r", label="test set")
plt.plot(pred_train, train[output], ".k", label="training set")
plt.axline((pred_train[0], pred_train[0]), slope=1, color="k", ls="--", lw=1)
plt.xlabel("Predicted (dB)")
plt.ylabel("True (dB)")
plt.legend(bbox_to_anchor=(1.05, 0.5), loc="center left", markerscale=2);
../_images/tutorials_real-world-use-case_5_0.svg
[6]:
from mini_gpr.opt import maximise_log_likelihood, optimise_model

best_model = optimise_model(
    model, maximise_log_likelihood, X, y, optimise_noise=True
)
best_model.kernel.params
[6]:
{'sigma': [0.3196713784422319,
  1.6818896267717998,
  1.3924475424814566,
  0.05,
  0.7961265272752749],
 'scale': 1.246932649911396}
[7]:
raw_pred_train = best_model.predict(X)
pred_train = output_scaler.inverse_transform(
    raw_pred_train.reshape(-1, 1)
).flatten()

raw_pred_test = best_model.predict(input_scaler.transform(test[inputs]))
pred_test = output_scaler.inverse_transform(
    raw_pred_test.reshape(-1, 1)
).flatten()

plt.figure(figsize=(3, 3))
plt.plot(pred_test, test[output], ".r", label="test set")
plt.plot(pred_train, train[output], ".k", label="training set")
plt.axline((pred_train[0], pred_train[0]), slope=1, color="k", ls="--", lw=1)
plt.xlabel("Predicted (dB)")
plt.ylabel("True (dB)")
plt.legend(bbox_to_anchor=(1.05, 0.5), loc="center left", markerscale=2)
[7]:
<matplotlib.legend.Legend at 0x12b6ff090>
../_images/tutorials_real-world-use-case_7_1.svg