BGPLVM updates and debug helper

This commit is contained in:
Max Zwiessele 2013-04-25 14:57:23 +01:00
parent e5b61030c3
commit e0f94d6d9c
3 changed files with 234 additions and 61 deletions

View file

@ -7,6 +7,7 @@ from matplotlib import pyplot as plt, pyplot
import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM
default_seed = np.random.seed(123344)
@ -129,9 +130,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
Y1 += .3 * np.random.randn(*Y1.shape)
Y2 += .3 * np.random.randn(*Y2.shape)
Y3 += .3 * np.random.randn(*Y3.shape)
Y1 += .2 * np.random.randn(*Y1.shape)
Y2 += .2 * np.random.randn(*Y2.shape)
Y3 += .2 * np.random.randn(*Y3.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
@ -162,11 +163,31 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
return slist, [S1, S2, S3], Ylist
def bgplvm_simulation_matlab_compare():
sim_data = simulation_BGPLVM()
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
M, [_, Q] = 20, mu.shape
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=True)
m.ensure_default_constraints()
m['noise'] = .01 # Y.var() / 100.
m['linear_variance'] = .01
return m
def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False,
do_opt=True,
max_f_eval=1000):
D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd
@ -176,11 +197,13 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.white(Q, .00001) # + kern.bias(Q)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
# k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',)
m.ensure_default_constraints()
m['noise'] = Y.var() / 100.
m['linear_variance'] = .001
# m.auto_scale_factor = True
# m.scale_factor = 1.
@ -207,7 +230,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
# cstr = 'X_variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
# m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
# cstr = "iip"
# m.unconstrain(cstr); m.constrain_fixed(cstr)

View file

@ -11,6 +11,8 @@ from ..likelihoods import Gaussian
from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools
from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
@ -31,7 +33,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
X = self.initialise_latent(init, Q, Y)
if X_variance is None:
X_variance = np.ones_like(X) * 0.5
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:M]
@ -45,10 +47,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._debug = _debug
if self._debug:
self.fcall = 0
self._count = itertools.count()
self._savedklll = []
self._savedparams = []
self._savedgradients = []
self._savederrors = []
self._savedpsiKmm = []
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
@property
@ -88,6 +93,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self.oldps = x
except (LinAlgError, FloatingPointError, ZeroDivisionError):
print "\rWARNING: Caught LinAlgError, continueing without setting "
if self._debug:
self._savederrors.append(self.fcall)
# if save_count > 10:
# raise
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
@ -121,12 +128,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# kl = 5E4 + np.random.randn()
if self._debug:
f_call = self._count.next()
self._savedklll.append([f_call, ll, kl])
if f_call % 1 == 0:
self._savedparams.append([f_call, self._get_params()])
self.f_call = self._count.next()
if self.f_call % 1 == 0:
self._savedklll.append([self.f_call, ll, kl])
self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# print "\nkl:", kl, "ll:", ll
return ll - kl
@ -212,16 +219,27 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
theta = x[start:]
return X, X_v, Z, theta
def _debug_get_axis(self, figs):
if figs[-1].axes:
ax1 = figs[-1].axes[0]
ax1.cla()
else:
ax1 = figs[-1].add_subplot(111)
return ax1
def _debug_plot(self):
assert self._debug, "must enable _debug, to debug-plot"
import pylab
from mpl_toolkits.mplot3d import Axes3D
fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
fig.clf()
# from mpl_toolkits.mplot3d import Axes3D
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4),
tight_layout=True)]
# fig.clf()
# log like
splotshape = (6, 4)
ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
# splotshape = (6, 4)
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
ax1 = self._debug_get_axis(figs)
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
ha='center', va='center')
kllls = np.array(self._savedklll)
@ -229,52 +247,141 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
drawn = dict(self._savedparams)
iters = np.array(drawn.keys())
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
self.showing = 0
ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
ax2 = self._debug_get_axis(figs)
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
ha='center', va='center')
ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
ax3 = self._debug_get_axis(figs)
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
ha='center', va='center')
ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
ax4 = self._debug_get_axis(figs)
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
ha='center', va='center')
ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
ax5 = self._debug_get_axis(figs)
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1]
ax6 = fig.add_subplot(121)
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
ha='center', va='center')
ax7 = fig.add_subplot(122)
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
ha='center', va='center')
X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
quiver_units = 'xy'
quiver_scale = 1
quiver_scale_units = 'xy'
Xlatentplts = ax2.plot(X, ls="-", marker="x")
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
Ulatent = np.zeros_like(X)
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
Slatentplts = ax3.plot(S, ls="-", marker="x")
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
Zplts = ax4.plot(Z, ls="-", marker="x")
thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xtheta = np.arange(len(theta))
Utheta = np.zeros_like(theta)
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale,
edgecolors=('k',), linewidths=[1])
pylab.setp(thetaplts, zorder=0)
pylab.setp(thetagrads, zorder=10)
ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
imkmm = ax6.imshow(kmm_dict[self.showing][0])
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax6)
caxkmm = divider.append_axes("right", "5%", pad="1%")
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
divider = make_axes_locatable(ax7)
caxkmmdl = divider.append_axes("right", "5%", pad="1%")
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
# borderaxespad=0, mode="expand")
ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
Lleg = ax1.legend()
Lleg.draggable()
ax1.add_artist(Qleg)
# ax1.add_artist(Qleg)
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
for err in self._savederrors:
ax1.plot(kllls[err, 0], kllls[err, 2], "*", c=KL.get_color())
ax1.plot(kllls[err, 0], kllls[err, 1] - kllls[err, 2], "*", c=LL.get_color())
ax1.plot(kllls[err, 0], kllls[err, 1], "*", c=L.get_color())
try:
pylab.draw()
pylab.tight_layout(box=(0, .1, 1, .9))
except:
pass
# try:
# for f in figs:
# f.canvas.draw()
# f.tight_layout(box=(0, .15, 1, .9))
# # pylab.draw()
# # pylab.tight_layout(box=(0, .1, 1, .9))
# except:
# pass
# parameter changes
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
def onclick(event):
if event.inaxes is ax1 and event.button == 1:
button_options = [0, 0] # [0]: clicked -- [1]: dragged
def update_plots(event):
if button_options[0] and not button_options[1]:
# event.button, event.x, event.y, event.xdata, event.ydata)
tmp = np.abs(iters - event.xdata)
closest_hit = iters[tmp == tmp.min()][0]
@ -287,15 +394,37 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
indicatorL.set_data(self.showing, kllls[self.showing, 1])
X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
for i, Xlatent in enumerate(Xlatentplts):
Xlatent.set_ydata(X[:, i])
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
Xlatentgrads.set_UVC(Ulatent, Xg)
for i, Slatent in enumerate(Slatentplts):
Slatent.set_ydata(S[:, i])
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
Slatentgrads.set_UVC(Ulatent, Sg)
for i, Zlatent in enumerate(Zplts):
Zlatent.set_ydata(Z[:, i])
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
Zgrads.set_UVC(UZ, Zg)
for p, t in zip(thetaplts, theta):
p.set_height(t)
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag)
imkmm.set_data(kmm_dict[self.showing][0])
imkmm.autoscale()
cbarkmm.update_normal(imkmm)
imkmmdl.set_data(kmm_dict[self.showing][1])
imkmmdl.autoscale()
cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
ax3.relim()
@ -305,8 +434,20 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax3.autoscale()
ax4.autoscale()
ax5.autoscale()
fig.canvas.draw()
cid = fig.canvas.mpl_connect('button_press_event', onclick)
[fig.canvas.draw() for fig in figs]
button_options[0] = 0
button_options[1] = 0
return ax1, ax2, ax3, ax4, ax5
def onclick(event):
if event.inaxes is ax1 and event.button == 1:
button_options[0] = 1
def motion(event):
if button_options[0]:
button_options[1] = 1
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5, ax6, ax7

View file

@ -4,14 +4,14 @@ import numpy as np
import GPy
import scipy.sparse
import scipy.io
data_path = os.path.join(os.path.dirname(__file__),'datasets')
default_seed =10000
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
# Some general utilities.
def sample_class(f):
p = 1./(1.+np.exp(-f))
c = np.random.binomial(1,p)
c = np.where(c,1,-1)
p = 1. / (1. + np.exp(-f))
c = np.random.binomial(1, p)
c = np.where(c, 1, -1)
return c
def della_gatta_TRP63_gene_expression(gene_number=None):
@ -25,6 +25,15 @@ def della_gatta_TRP63_gene_expression(gene_number=None):
Y = Y[:, None]
return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
def simulation_BGPLVM():
mat_data = scipy.io.loadmat(os.path.join(data_path, 'BGPLVMSimulation.mat'))
Y = np.array(mat_data['Y'], dtype=float)
S = np.array(mat_data['initS'], dtype=float)
mu = np.array(mat_data['initMu'], dtype=float)
return {'Y': Y, 'S': S,
'mu' : mu,
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
# The data sets
def oil():
@ -32,7 +41,7 @@ def oil():
X = np.fromfile(fid, sep='\t').reshape((-1, 12))
fid.close()
fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
Y = np.fromfile(fid, sep='\t').reshape((-1, 3))*2.-1.
Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
fid.close()
return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
@ -74,9 +83,9 @@ def silhouette():
inMean = np.mean(mat_data['Y'])
inScales = np.sqrt(np.var(mat_data['Y']))
X = mat_data['Y'] - inMean
X = X/inScales
X = X / inScales
Xtest = mat_data['Y_test'] - inMean
Xtest = Xtest/inScales
Xtest = Xtest / inScales
Y = mat_data['Z']
Ytest = mat_data['Z_test']
return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
@ -102,13 +111,13 @@ def toy_rbf_1d(seed=default_seed):
np.random.seed(seed=seed)
numIn = 1
N = 500
X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn))
X.sort(axis=0)
rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(numIn, variance=1e-2)
kernel = rbf + white
K = kernel.K(X)
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N,1))
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1))
return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
def toy_rbf_1d_50(seed=default_seed):
@ -124,15 +133,15 @@ def toy_rbf_1d_50(seed=default_seed):
def toy_linear_1d_classification(seed=default_seed):
np.random.seed(seed=seed)
x1 = np.random.normal(-3,5,20)
x2 = np.random.normal(3,5,20)
X = (np.r_[x1,x2])[:,None]
x1 = np.random.normal(-3, 5, 20)
x2 = np.random.normal(3, 5, 20)
X = (np.r_[x1, x2])[:, None]
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X}
def rogers_girolami_olympics():
olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
X = olympic_data[:, 0][:, None]
Y= olympic_data[:, 1][:, None]
Y = olympic_data[:, 1][:, None]
return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
# def movielens_small(partNo=1,seed=default_seed):
# np.random.seed(seed=seed)
@ -169,7 +178,7 @@ def rogers_girolami_olympics():
def crescent_data(num_data=200,seed=default_seed):
def crescent_data(num_data=200, seed=default_seed):
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
:param num_data_part: number of data to be sampled (default is 200).
:type num_data: int
@ -178,7 +187,7 @@ def crescent_data(num_data=200,seed=default_seed):
np.random.seed(seed=seed)
sqrt2 = np.sqrt(2)
# Rotation matrix
R = np.array([[sqrt2/2, -sqrt2/2], [sqrt2/2, sqrt2/2]])
R = np.array([[sqrt2 / 2, -sqrt2 / 2], [sqrt2 / 2, sqrt2 / 2]])
# Scaling matrices
scales = []
scales.append(np.array([[3, 0], [0, 1]]))
@ -195,9 +204,9 @@ def crescent_data(num_data=200,seed=default_seed):
num_data_part = []
num_data_total = 0
for i in range(0, 4):
num_data_part.append(round(((i+1)*num_data)/4.))
num_data_part.append(round(((i + 1) * num_data) / 4.))
num_data_part[i] -= num_data_total
#print num_data_part[i]
# print num_data_part[i]
part = np.random.normal(size=(num_data_part[i], 2))
part = np.dot(np.dot(part, scales[i]), R) + means[i]
Xparts.append(part)
@ -205,7 +214,7 @@ def crescent_data(num_data=200,seed=default_seed):
X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
Y = np.vstack((np.ones((num_data_part[0]+num_data_part[1], 1)), -np.ones((num_data_part[2]+num_data_part[3], 1))))
Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
@ -214,6 +223,6 @@ def creep_data():
y = all_data[:, 1:2].copy()
features = [0]
features.extend(range(2, 31))
X = all_data[:,features].copy()
X = all_data[:, features].copy()
return {'X': X, 'y' : y}