In [1]:
get_ipython().ast_node_interactivity = 'all'
import matplotlib.pyplot as plt
import numpy as np
In [2]:
vals = np.random.normal(0, 1, 10_000_000)

np.mean(vals), np.std(vals)
Out [2]:
(0.00010162346483672586, 0.9999363897125003)
In [3]:
# PDF

hist = np.histogram(vals, bins=100)

Xs = hist[1][:-1]
Ys = hist[0]

plt.plot(hist[1][:-1], hist[0])
Out [3]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [4]:
# Approximate the PDF with a polynomial

pdf_poly = np.polyfit(Xs, Ys, 20)

pdf = lambda x: np.polyval(pdf_poly, x)

plt.plot(Xs, pdf(Xs))
Out [4]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [5]:
# CDF
cdf_vals = []
s = 0.0

for x in Xs:
    s += pdf(x)
    cdf_vals.append(s)

cdf_vals = np.array(cdf_vals) / s

plt.plot(Xs, cdf_vals)
Out [5]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [17]:
# Approximate the CDF with a polynomial

cdf_poly = np.polyfit(Xs, cdf_vals, 20)

cdf = lambda x: np.polyval(cdf_poly, x)

plt.plot(Xs, cdf(Xs))
Out [17]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [48]:
# Approximate the inverse CDF with a polynomial
y = cdf(Xs)

# Learn the inverse function
import sklearn.neural_network
from sklearn.preprocessing import MinMaxScaler

inputscaler = MinMaxScaler()
outputscaler = MinMaxScaler()

m = sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(100, 100, 100), max_iter=1000000, activation='logistic')

m.fit(inputscaler.fit_transform(y.reshape(-1, 1)), outputscaler.fit_transform(Xs.reshape(-1, 1)))

def inv_cdf(x):
    inp = inputscaler.transform(x.reshape(-1, 1))
    res = m.predict(inp).reshape(-1, 1)
    return outputscaler.inverse_transform(res)

plt.plot(cdf(Xs), Xs)

xs = np.linspace(0, 1, 500)

_ = plt.plot(xs, inv_cdf(xs))
Out:
/usr/lib/python3.11/site-packages/sklearn/neural_network/_multilayer_perceptron.py:1624: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Out [48]:
MLPRegressor(activation='logistic', hidden_layer_sizes=(100, 100, 100),
             max_iter=1000000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Out [48]:
[]
Out:
<Figure size 640x480 with 1 Axes>
In [45]:
# Sample from the distribution
xs = np.random.uniform(0, 1, 5000)
samples = inv_cdf(xs)

_ = plt.hist(samples, bins=100)
Out:
<Figure size 640x480 with 1 Axes>