From 6945ad7aa14d498d8e6ba4d39029f4cc21a88d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Fusi?= Date: Fri, 11 Oct 2013 16:19:27 -0700 Subject: [PATCH 1/7] Seems to work on windows now not everything works yet, but I've identified the main issues. Still TODO: handle missing OMP libraries gracefully --- GPy/util/linalg.py | 4 +++- GPy/util/misc.py | 20 +++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4e7f7fff..213cd047 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -325,6 +325,7 @@ def symmetrify(A, upper=False): """ N, M = A.shape assert N == M + c_contig_code = """ int iN; for (int i=1; i + // #include #include """ @@ -107,15 +107,17 @@ def fast_array_equal(A, B): return False elif A.shape == B.shape: if A.ndim == 2: - N, D = A.shape - value = weave.inline(code2, support_code=support_code, libraries=['gomp'], + N, D = [int(i) for i in A.shape] + value = weave.inline(code2, support_code=support_code, arg_names=['A', 'B', 'N', 'D'], - type_converters=weave.converters.blitz,**weave_options) + type_converters=weave.converters.blitz) + # libraries=['gomp'], **weave_options) elif A.ndim == 3: - N, D, Q = A.shape - value = weave.inline(code3, support_code=support_code, libraries=['gomp'], + N, D, Q = [int(i) for i in A.shape] + value = weave.inline(code3, support_code=support_code, arg_names=['A', 'B', 'N', 'D', 'Q'], - type_converters=weave.converters.blitz,**weave_options) + type_converters=weave.converters.blitz) + #libraries=['gomp'], **weave_options) else: value = np.array_equal(A,B) From a92780cb89cfea5ff2fb57d97356b6889079e9cc Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 14 Oct 2013 05:59:15 +0100 Subject: [PATCH 2/7] Added olivetti faces data set. It required adding netpbmfile.py a bsd licensed pgm file reader from Christoph Gohlke, which doesn't seem to have a spearate installer. Also modified image_show to assume by default that array ordering is python instead of fortran. Modified brendan_faces demo to explicilty force fortran ordering. Notified Teo of change. --- GPy/examples/dimensionality_reduction.py | 31 ++- GPy/util/__init__.py | 2 + GPy/util/datasets.py | 87 ++++-- GPy/util/netpbmfile.py | 331 +++++++++++++++++++++++ GPy/util/visualize.py | 61 +++-- 5 files changed, 458 insertions(+), 54 deletions(-) create mode 100644 GPy/util/netpbmfile.py diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 005b131f..8aaeb4ae 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -327,31 +327,52 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): m.plot_scales("MRD Scales") return m + + def brendan_faces(): from GPy import kern data = GPy.util.datasets.brendan_faces() Q = 2 - Y = data['Y'][0:-1:10, :] - # Y = data['Y'] + Y = data['Y'] Yn = Y - Y.mean() Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) - m.optimize('scg', messages=1, max_f_eval=10000) + m.optimize('scg', messages=1, max_iters=10) ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m + +def olivetti_faces(): + from GPy import kern + data = GPy.util.datasets.olivetti_faces() + Q = 2 + Y = data['Y'] + Yn = Y - Y.mean() + Yn /= Yn.std() + + m = GPy.models.GPLVM(Yn, Q) + m.optimize('scg', messages=1, max_iters=1000) + + ax = m.plot_latent(which_indices=(0, 1)) + y = m.likelihood.Y[0, :] + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) + lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + raw_input('Press enter to finish') + + return m + def stick_play(range=None, frame_rate=15): + data = GPy.util.datasets.osu_run1() # optimize if range == None: diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 99548268..db9b7362 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -14,3 +14,5 @@ import visualize import decorators import classification import latent_space_visualizations + +import netpbmfile diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 2ff168b3..45ed694c 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -8,17 +8,12 @@ import zipfile import tarfile import datetime -ipython_notebook = False -if ipython_notebook: - import IPython.core.display - def ipynb_input(varname, prompt=''): - """Prompt user for input and assign string val to given variable name.""" - js_code = (""" - var value = prompt("{prompt}",""); - var py_code = "{varname} = '" + value + "'"; - IPython.notebook.kernel.execute(py_code); - """).format(prompt=prompt, varname=varname) - return IPython.core.display.Javascript(js_code) +ipython_available=True +try: + import IPython +except ImportError: + ipython_available=False + import sys, urllib @@ -34,8 +29,11 @@ data_path = os.path.join(os.path.dirname(__file__), 'datasets') default_seed = 10000 overide_manual_authorize=False neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/' +sam_url = 'http://www.cs.nyu.edu/~roweis/data/' cmu_url = 'http://mocap.cs.cmu.edu/subjects/' -# Note: there may be a better way of storing data resources. One of the pythonistas will need to take a look. + +# Note: there may be a better way of storing data resources, for the +# moment we are storing them in a dictionary. data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], 'files' : [['ankurDataPoseSilhouette.mat']], 'license' : None, @@ -49,7 +47,7 @@ data_resources = {'ankur_pose_data' : {'urls' : [neil_url + 'ankur_pose_data/'], 'license' : None, 'size' : 51276 }, - 'brendan_faces' : {'urls' : ['http://www.cs.nyu.edu/~roweis/data/'], + 'brendan_faces' : {'urls' : [sam_url], 'files': [['frey_rawface.mat']], 'citation' : 'Frey, B. J., Colmenarez, A and Huang, T. S. Mixtures of Local Linear Subspaces for Face Recognition. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition 1998, 32-37, June 1998. Computer Society Press, Los Alamitos, CA.', 'details' : """A video of Brendan Frey's face popularized as a benchmark for visualization by the Locally Linear Embedding.""", @@ -93,6 +91,12 @@ The database was created with funding from NSF EIA-0196217.""", 'details' : """Data from the textbook 'A First Course in Machine Learning'. Available from http://www.dcs.gla.ac.uk/~srogers/firstcourseml/.""", 'license' : None, 'size' : 21949154}, + 'olivetti_faces' : {'urls' : [neil_url + 'olivetti_faces/', sam_url], + 'files' : [['att_faces.zip'], ['olivettifaces.mat']], + 'citation' : 'Ferdinando Samaria and Andy Harter, Parameterisation of a Stochastic Model for Human Face Identification. Proceedings of 2nd IEEE Workshop on Applications of Computer Vision, Sarasota FL, December 1994', + 'details' : """Olivetti Research Labs Face data base, acquired between December 1992 and December 1994 in the Olivetti Research Lab, Cambridge (which later became AT&T Laboratories, Cambridge). When using these images please give credit to AT&T Laboratories, Cambridge. """, + 'license': None, + 'size' : 8561331}, 'olympic_marathon_men' : {'urls' : [neil_url + 'olympic_marathon_men/'], 'files' : [['olympicMarathonTimes.csv']], 'citation' : None, @@ -144,23 +148,32 @@ The database was created with funding from NSF EIA-0196217.""", } -def prompt_user(): +def prompt_user(prompt): """Ask user for agreeing to data set licenses.""" # raw_input returns the empty string for "enter" yes = set(['yes', 'y']) no = set(['no','n']) - choice = '' - if ipython_notebook: - ipynb_input(choice, prompt='provide your answer here') - else: + + try: + print(prompt) choice = raw_input().lower() + # would like to test for exception here, but not sure if we can do that without importing IPython + except: + print('Stdin is not implemented.') + print('You need to set') + print('overide_manual_authorize=True') + print('to proceed with the download. Please set that variable and continue.') + raise + + if choice in yes: return True elif choice in no: return False else: - sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'") - return prompt_user() + print("Your response was a " + choice) + print("Please respond with 'yes', 'y' or 'no', 'n'") + #return prompt_user() def data_available(dataset_name=None): @@ -212,15 +225,14 @@ def authorize_download(dataset_name=None): print('You must also agree to the following license:') print(dr['license']) print('') - print('Do you wish to proceed with the download? [yes/no]') - return prompt_user() + return prompt_user('Do you wish to proceed with the download? [yes/no]') def download_data(dataset_name=None): """Check with the user that the are happy with terms and conditions for the data set, then download it.""" dr = data_resources[dataset_name] if not authorize_download(dataset_name): - return False + raise Exception("Permission to download data set denied.") if dr.has_key('suffices'): for url, files, suffices in zip(dr['urls'], dr['files'], dr['suffices']): @@ -489,12 +501,12 @@ def ripley_synth(data_set='ripley_prnn_data'): return data_details_return({'X': X, 'y': y, 'Xtest': Xtest, 'ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) def osu_run1(data_set='osu_run1', sample_every=4): + path = os.path.join(data_path, data_set) if not data_available(data_set): download_data(data_set) - zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r') - path = os.path.join(data_path, data_set) - for name in zip.namelist(): - zip.extract(name, path) + zip = zipfile.ZipFile(os.path.join(data_path, data_set, 'run1TXT.ZIP'), 'r') + for name in zip.namelist(): + zip.extract(name, path) Y, connect = GPy.util.mocap.load_text_data('Aug210106', path) Y = Y[0:-1:sample_every, :] return data_details_return({'Y': Y, 'connect' : connect}, data_set) @@ -579,6 +591,24 @@ def toy_linear_1d_classification(seed=default_seed): X = (np.r_[x1, x2])[:, None] return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X, 'seed' : seed} +def olivetti_faces(data_set='olivetti_faces'): + path = os.path.join(data_path, data_set) + if not data_available(data_set): + download_data(data_set) + zip = zipfile.ZipFile(os.path.join(path, 'att_faces.zip'), 'r') + for name in zip.namelist(): + zip.extract(name, path) + Y = [] + lbls = [] + for subject in range(40): + for image in range(10): + image_path = os.path.join(path, 'orl_faces', 's'+str(subject+1), str(image+1) + '.pgm') + Y.append(GPy.util.netpbmfile.imread(image_path).flatten()) + lbls.append(subject) + Y = np.asarray(Y) + lbls = np.asarray(lbls)[:, None] + return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set) + def olympic_100m_men(data_set='rogers_girolami_data'): if not data_available(data_set): download_data(data_set) @@ -586,7 +616,8 @@ def olympic_100m_men(data_set='rogers_girolami_data'): tar_file = os.path.join(path, 'firstcoursemldata.tar.gz') tar = tarfile.open(tar_file) print('Extracting file.') - tar.extractall(path=path) + tar.extractall(path=path) + tar.close() olympic_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'data', 'olympics.mat'))['male100'] diff --git a/GPy/util/netpbmfile.py b/GPy/util/netpbmfile.py new file mode 100644 index 00000000..030bd574 --- /dev/null +++ b/GPy/util/netpbmfile.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# netpbmfile.py + +# Copyright (c) 2011-2013, Christoph Gohlke +# Copyright (c) 2011-2013, The Regents of the University of California +# Produced at the Laboratory for Fluorescence Dynamics. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Read and write image data from respectively to Netpbm files. + +This implementation follows the Netpbm format specifications at +http://netpbm.sourceforge.net/doc/. No gamma correction is performed. + +The following image formats are supported: PBM (bi-level), PGM (grayscale), +PPM (color), PAM (arbitrary), XV thumbnail (RGB332, read-only). + +:Author: + `Christoph Gohlke `_ + +:Organization: + Laboratory for Fluorescence Dynamics, University of California, Irvine + +:Version: 2013.01.18 + +Requirements +------------ +* `CPython 2.7, 3.2 or 3.3 `_ +* `Numpy 1.7 `_ +* `Matplotlib 1.2 `_ (optional for plotting) + +Examples +-------- +>>> im1 = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) +>>> imsave('_tmp.pgm', im1) +>>> im2 = imread('_tmp.pgm') +>>> assert numpy.all(im1 == im2) + +""" + +from __future__ import division, print_function + +import sys +import re +import math +from copy import deepcopy + +import numpy + +__version__ = '2013.01.18' +__docformat__ = 'restructuredtext en' +__all__ = ['imread', 'imsave', 'NetpbmFile'] + + +def imread(filename, *args, **kwargs): + """Return image data from Netpbm file as numpy array. + + `args` and `kwargs` are arguments to NetpbmFile.asarray(). + + Examples + -------- + >>> image = imread('_tmp.pgm') + + """ + try: + netpbm = NetpbmFile(filename) + image = netpbm.asarray() + finally: + netpbm.close() + return image + + +def imsave(filename, data, maxval=None, pam=False): + """Write image data to Netpbm file. + + Examples + -------- + >>> image = numpy.array([[0, 1],[65534, 65535]], dtype=numpy.uint16) + >>> imsave('_tmp.pgm', image) + + """ + try: + netpbm = NetpbmFile(data, maxval=maxval) + netpbm.write(filename, pam=pam) + finally: + netpbm.close() + + +class NetpbmFile(object): + """Read and write Netpbm PAM, PBM, PGM, PPM, files.""" + + _types = {b'P1': b'BLACKANDWHITE', b'P2': b'GRAYSCALE', b'P3': b'RGB', + b'P4': b'BLACKANDWHITE', b'P5': b'GRAYSCALE', b'P6': b'RGB', + b'P7 332': b'RGB', b'P7': b'RGB_ALPHA'} + + def __init__(self, arg=None, **kwargs): + """Initialize instance from filename, open file, or numpy array.""" + for attr in ('header', 'magicnum', 'width', 'height', 'maxval', + 'depth', 'tupltypes', '_filename', '_fh', '_data'): + setattr(self, attr, None) + if arg is None: + self._fromdata([], **kwargs) + elif isinstance(arg, basestring): + self._fh = open(arg, 'rb') + self._filename = arg + self._fromfile(self._fh, **kwargs) + elif hasattr(arg, 'seek'): + self._fromfile(arg, **kwargs) + self._fh = arg + else: + self._fromdata(arg, **kwargs) + + def asarray(self, copy=True, cache=False, **kwargs): + """Return image data from file as numpy array.""" + data = self._data + if data is None: + data = self._read_data(self._fh, **kwargs) + if cache: + self._data = data + else: + return data + return deepcopy(data) if copy else data + + def write(self, arg, **kwargs): + """Write instance to file.""" + if hasattr(arg, 'seek'): + self._tofile(arg, **kwargs) + else: + with open(arg, 'wb') as fid: + self._tofile(fid, **kwargs) + + def close(self): + """Close open file. Future asarray calls might fail.""" + if self._filename and self._fh: + self._fh.close() + self._fh = None + + def __del__(self): + self.close() + + def _fromfile(self, fh): + """Initialize instance from open file.""" + fh.seek(0) + data = fh.read(4096) + if (len(data) < 7) or not (b'0' < data[1:2] < b'8'): + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + try: + self._read_pam_header(data) + except Exception: + try: + self._read_pnm_header(data) + except Exception: + raise ValueError("Not a Netpbm file:\n%s" % data[:32]) + + def _read_pam_header(self, data): + """Read PAM header and initialize instance.""" + regroups = re.search( + b"(^P7[\n\r]+(?:(?:[\n\r]+)|(?:#.*)|" + b"(HEIGHT\s+\d+)|(WIDTH\s+\d+)|(DEPTH\s+\d+)|(MAXVAL\s+\d+)|" + b"(?:TUPLTYPE\s+\w+))*ENDHDR\n)", data).groups() + self.header = regroups[0] + self.magicnum = b'P7' + for group in regroups[1:]: + key, value = group.split() + setattr(self, unicode(key).lower(), int(value)) + matches = re.findall(b"(TUPLTYPE\s+\w+)", self.header) + self.tupltypes = [s.split(None, 1)[1] for s in matches] + + def _read_pnm_header(self, data): + """Read PNM header and initialize instance.""" + bpm = data[1:2] in b"14" + regroups = re.search(b"".join(( + b"(^(P[123456]|P7 332)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*", + b"\s*(\d+)\s+(?:#.*[\r\n])*" * (not bpm), + b"\s*(\d+)\s(?:\s*#.*[\r\n]\s)*)")), data).groups() + (1, ) * bpm + self.header = regroups[0] + self.magicnum = regroups[1] + self.width = int(regroups[2]) + self.height = int(regroups[3]) + self.maxval = int(regroups[4]) + self.depth = 3 if self.magicnum in b"P3P6P7 332" else 1 + self.tupltypes = [self._types[self.magicnum]] + + def _read_data(self, fh, byteorder='>'): + """Return image data from open file as numpy array.""" + fh.seek(len(self.header)) + data = fh.read() + dtype = 'u1' if self.maxval < 256 else byteorder + 'u2' + depth = 1 if self.magicnum == b"P7 332" else self.depth + shape = [-1, self.height, self.width, depth] + size = numpy.prod(shape[1:]) + if self.magicnum in b"P1P2P3": + data = numpy.array(data.split(None, size)[:size], dtype) + data = data.reshape(shape) + elif self.maxval == 1: + shape[2] = int(math.ceil(self.width / 8)) + data = numpy.frombuffer(data, dtype).reshape(shape) + data = numpy.unpackbits(data, axis=-2)[:, :, :self.width, :] + else: + data = numpy.frombuffer(data, dtype) + data = data[:size * (data.size // size)].reshape(shape) + if data.shape[0] < 2: + data = data.reshape(data.shape[1:]) + if data.shape[-1] < 2: + data = data.reshape(data.shape[:-1]) + if self.magicnum == b"P7 332": + rgb332 = numpy.array(list(numpy.ndindex(8, 8, 4)), numpy.uint8) + rgb332 *= [36, 36, 85] + data = numpy.take(rgb332, data, axis=0) + return data + + def _fromdata(self, data, maxval=None): + """Initialize instance from numpy array.""" + data = numpy.array(data, ndmin=2, copy=True) + if data.dtype.kind not in "uib": + raise ValueError("not an integer type: %s" % data.dtype) + if data.dtype.kind == 'i' and numpy.min(data) < 0: + raise ValueError("data out of range: %i" % numpy.min(data)) + if maxval is None: + maxval = numpy.max(data) + maxval = 255 if maxval < 256 else 65535 + if maxval < 0 or maxval > 65535: + raise ValueError("data out of range: %i" % maxval) + data = data.astype('u1' if maxval < 256 else '>u2') + self._data = data + if data.ndim > 2 and data.shape[-1] in (3, 4): + self.depth = data.shape[-1] + self.width = data.shape[-2] + self.height = data.shape[-3] + self.magicnum = b'P7' if self.depth == 4 else b'P6' + else: + self.depth = 1 + self.width = data.shape[-1] + self.height = data.shape[-2] + self.magicnum = b'P5' if maxval > 1 else b'P4' + self.maxval = maxval + self.tupltypes = [self._types[self.magicnum]] + self.header = self._header() + + def _tofile(self, fh, pam=False): + """Write Netbm file.""" + fh.seek(0) + fh.write(self._header(pam)) + data = self.asarray(copy=False) + if self.maxval == 1: + data = numpy.packbits(data, axis=-1) + data.tofile(fh) + + def _header(self, pam=False): + """Return file header as byte string.""" + if pam or self.magicnum == b'P7': + header = "\n".join(( + "P7", + "HEIGHT %i" % self.height, + "WIDTH %i" % self.width, + "DEPTH %i" % self.depth, + "MAXVAL %i" % self.maxval, + "\n".join("TUPLTYPE %s" % unicode(i) for i in self.tupltypes), + "ENDHDR\n")) + elif self.maxval == 1: + header = "P4 %i %i\n" % (self.width, self.height) + elif self.depth == 1: + header = "P5 %i %i %i\n" % (self.width, self.height, self.maxval) + else: + header = "P6 %i %i %i\n" % (self.width, self.height, self.maxval) + if sys.version_info[0] > 2: + header = bytes(header, 'ascii') + return header + + def __str__(self): + """Return information about instance.""" + return unicode(self.header) + + +if sys.version_info[0] > 2: + basestring = str + unicode = lambda x: str(x, 'ascii') + +if __name__ == "__main__": + # Show images specified on command line or all images in current directory + from glob import glob + from matplotlib import pyplot + files = sys.argv[1:] if len(sys.argv) > 1 else glob('*.p*m') + for fname in files: + try: + pam = NetpbmFile(fname) + img = pam.asarray(copy=False) + if False: + pam.write('_tmp.pgm.out', pam=True) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + imsave('_tmp.pgm.out', img) + img2 = imread('_tmp.pgm.out') + assert numpy.all(img == img2) + pam.close() + except ValueError as e: + print(fname, e) + continue + _shape = img.shape + if img.ndim > 3 or (img.ndim > 2 and img.shape[-1] not in (3, 4)): + img = img[0] + cmap = 'gray' if pam.maxval > 1 else 'binary' + pyplot.imshow(img, cmap, interpolation='nearest') + pyplot.title("%s %s %s %s" % (fname, unicode(pam.magicnum), + _shape, img.dtype)) + pyplot.show() diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index 7a519555..ecdf78ce 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -246,17 +246,36 @@ class lvm_dimselect(lvm): class image_show(matplotlib_show): - """Show a data vector as an image.""" - def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False, palette=[], presetMean = 0., presetSTD = -1., selectImage=0): + """Show a data vector as an image. This visualizer rehapes the output vector and displays it as an image. + + :param vals: the values of the output to display. + :type vals: ndarray + :param axes: the axes to show the output on. + :type vals: axes handle + :param dimensions: the dimensions that the image needs to be transposed to for display. + :type dimensions: tuple + :param transpose: whether to transpose the image before display. + :type bool: default is False. + :param order: whether array is in Fortan ordering ('F') or Python ordering ('C'). Default is python ('C'). + :type order: string + :param invert: whether to invert the pixels or not (default False). + :type invert: bool + :param palette: a palette to use for the image. + :param preset_mean: the preset mean of a scaled image. + :type preset_mean: double + :param preset_std: the preset standard deviation of a scaled image. + :type preset_std: double""" + def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean = 0., preset_std = -1., select_image=0): matplotlib_show.__init__(self, vals, axes) self.dimensions = dimensions self.transpose = transpose + self.order = order self.invert = invert self.scale = scale self.palette = palette - self.presetMean = presetMean - self.presetSTD = presetSTD - self.selectImage = selectImage # This is used when the y vector contains multiple images concatenated. + self.preset_mean = preset_mean + self.preset_std = preset_std + self.select_image = select_image # This is used when the y vector contains multiple images concatenated. self.set_image(self.vals) if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette) @@ -272,22 +291,22 @@ class image_show(matplotlib_show): def set_image(self, vals): dim = self.dimensions[0] * self.dimensions[1] - nImg = np.sqrt(vals[0,].size/dim) - if nImg > 1 and nImg.is_integer(): # Show a mosaic of images - nImg = np.int(nImg) - self.vals = np.zeros((self.dimensions[0]*nImg, self.dimensions[1]*nImg)) - for iR in range(nImg): - for iC in range(nImg): - currImgId = iR*nImg + iC - currImg = np.reshape(vals[0,dim*currImgId+np.array(range(dim))], self.dimensions, order='F') - firstRow = iR*self.dimensions[0] - lastRow = (iR+1)*self.dimensions[0] - firstCol = iC*self.dimensions[1] - lastCol = (iC+1)*self.dimensions[1] - self.vals[firstRow:lastRow, firstCol:lastCol] = currImg + num_images = np.sqrt(vals[0,].size/dim) + if num_images > 1 and num_images.is_integer(): # Show a mosaic of images + num_images = np.int(num_images) + self.vals = np.zeros((self.dimensions[0]*num_images, self.dimensions[1]*num_images)) + for iR in range(num_images): + for iC in range(num_images): + cur_img_id = iR*num_images + iC + cur_img = np.reshape(vals[0,dim*cur_img_id+np.array(range(dim))], self.dimensions, order=self.order) + first_row = iR*self.dimensions[0] + last_row = (iR+1)*self.dimensions[0] + first_col = iC*self.dimensions[1] + last_col = (iC+1)*self.dimensions[1] + self.vals[first_row:last_row, first_col:last_col] = cur_img else: - self.vals = np.reshape(vals[0,dim*self.selectImage+np.array(range(dim))], self.dimensions, order='F') + self.vals = np.reshape(vals[0,dim*self.select_image+np.array(range(dim))], self.dimensions, order=self.order) if self.transpose: self.vals = self.vals.T # if not self.scale: @@ -296,8 +315,8 @@ class image_show(matplotlib_show): self.vals = -self.vals # un-normalizing, for visualisation purposes: - if self.presetSTD >= 0: # The Mean is assumed to be in the range (0,255) - self.vals = self.vals*self.presetSTD + self.presetMean + if self.preset_std >= 0: # The Mean is assumed to be in the range (0,255) + self.vals = self.vals*self.preset_std + self.preset_mean # Clipping the values: self.vals[self.vals < 0] = 0 self.vals[self.vals > 255] = 255 From fe30db1331cd5f4ac20b5e36de0cdf68ba867bfa Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 14 Oct 2013 09:37:35 +0100 Subject: [PATCH 3/7] Updated sympy code, multioutput grad checks pass apart from wrt X. Similar problems with prediction as to sinc covariance, needs investigation. --- GPy/examples/dimensionality_reduction.py | 4 +- GPy/kern/constructors.py | 8 ++- GPy/kern/parts/sympykern.py | 81 +++++++++++++++-------- GPy/util/datasets.py | 83 +++++++++++++++++++----- 4 files changed, 124 insertions(+), 52 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 8aaeb4ae..298607b6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -327,8 +327,6 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): m.plot_scales("MRD Scales") return m - - def brendan_faces(): from GPy import kern data = GPy.util.datasets.brendan_faces() @@ -342,7 +340,7 @@ def brendan_faces(): # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) - m.optimize('scg', messages=1, max_iters=10) + m.optimize('scg', messages=1, max_iters=1000) ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 62c29744..c6a6672f 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -322,17 +322,19 @@ if sympy_available: real_input_dim -= 1 X = sp.symbols('x_:' + str(real_input_dim)) Z = sp.symbols('z_:' + str(real_input_dim)) - variance = sp.var('variance',positive=True) + scale = sp.var('scale_i scale_j',positive=True) if ARD: lengthscales = [sp.var('lengthscale%i_i lengthscale%i_j' % i, positive=True) for i in range(real_input_dim)] - dist_string = ' + '.join(['(x_%i-z_%i)**2/(lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)]) + shared_lengthscales = [sp.var('shared_lengthscale%i' % i, positive=True) for i in range(real_input_dim)] + dist_string = ' + '.join(['(x_%i-z_%i)**2/(shared_lengthscale%i**2 + lengthscale%i_i*lengthscale%i_j)' % (i, i, i) for i in range(real_input_dim)]) dist = parse_expr(dist_string) f = variance*sp.exp(-dist/2.) else: lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True) + shared_lengthscale = sp.var('shared_lengthscale',positive=True) dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)]) dist = parse_expr(dist_string) - f = variance*sp.exp(-dist/(2*lengthscale_i*lengthscale_j)) + f = scale_i*scale_j*sp.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j))) return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')]) def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index 09ab9934..ea603eab 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -43,9 +43,9 @@ class spkern(Kernpart): assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) self.input_dim = len(self._sp_x) + self._real_input_dim = self.input_dim if output_dim > 1: self.input_dim += 1 - assert self.input_dim == input_dim self.output_dim = output_dim # extract parameter names @@ -139,8 +139,10 @@ class spkern(Kernpart): self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) # This is the basic argument construction for the C code. - arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] - + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) + #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] + # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) + arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) if self.output_dim>1: reverse_arg_list = list(arg_list) reverse_arg_list.reverse() @@ -151,17 +153,21 @@ class spkern(Kernpart): precompute_list=[] if self.output_dim > 1: reverse_arg_list+=list(param_arg_list) - split_param_arg_list = ["%s[%s]"%(theta.name[:-2],index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] - split_param_reverse_arg_list = ["%s[%s]"%(theta.name[:-2],index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] + split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] + split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] arg_list += split_param_arg_list reverse_arg_list += split_param_reverse_arg_list - precompute_list += [' '*16+"int %s=(int)%s[%s*input_dim+output_dim];"%(index, var, index2) for index, var, index2 in zip(['ii', 'jj'], ['X', 'Z'], ['i', 'j'])] + # Extract the right output indices from the inputs. + c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] + precompute_list += c_define_output_indices reverse_arg_string = ", ".join(reverse_arg_list) arg_string = ", ".join(arg_list) precompute_string = "\n".join(precompute_list) # Here's the code to do the looping for K self._K_code =\ """ + // _K_code + // Code for computing the covariance function. int i; int j; int N = target_array->dimensions[0]; @@ -171,7 +177,8 @@ class spkern(Kernpart): for (i=0;idimensions[0]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for for (i=0;i1: - func_list += [' '*16 + "int %s=(int)%s[%s*input_dim+output_dim];"%(index, var, index2) for index, var, index2 in zip(['ii', 'jj'], ['X', 'Z'], ['i', 'j'])] - func_list += [' '*16 + 'target[%i+ii] += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - func_list += [' '*16 + 'target[%i+jj] += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - func_list += ([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) - func_string = '\n'.join(func_list) + grad_func_list += c_define_output_indices + grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) + grad_func_string = '\n'.join(grad_func_list) self._dK_dtheta_code =\ """ + // _dK_dtheta_code + // Code for computing gradient of covariance with respect to parameters. int i; int j; int N = partial_array->dimensions[0]; @@ -222,16 +234,18 @@ class spkern(Kernpart): } } %s - """%(func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed + """%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed # Code to compute gradients for Kdiag TODO: needs clean up - diag_func_string = re.sub('Z','X',func_string,count=0) - diag_func_string = re.sub('int jj','//int jj',diag_func_string) - diag_func_string = re.sub('j','i',diag_func_string) - diag_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_func_string) + diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0) + diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string) + diag_grad_func_string = re.sub('j','i',diag_grad_func_string) + diag_grad_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_grad_func_string) self._dKdiag_dtheta_code =\ """ + // _dKdiag_dtheta_code + // Code for computing gradient of diagonal with respect to parameters. int i; int N = partial_array->dimensions[0]; int input_dim = X_array->dimensions[1]; @@ -239,13 +253,19 @@ class spkern(Kernpart): %s } %s - """%(diag_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed + """%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed - # Code for gradients wrt X - gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arg_string) for q in range(self.input_dim)]) + # Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output. + gradX_func_list = [] + if self.output_dim>1: + gradX_func_list += c_define_output_indices + gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_string = "\n".join(gradX_func_list) self._dK_dX_code = \ """ + // _dK_dX_code + // Code for computing gradient of covariance with respect to inputs. int i; int j; int N = partial_array->dimensions[0]; @@ -258,24 +278,26 @@ class spkern(Kernpart): } } %s - """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed + """%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed - diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0) - diag_gradient_funcs = re.sub('int jj','//int jj',diag_gradient_funcs) - diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs) - diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs) + diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0) + diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string) + diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string) + diag_gradX_func_string = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradX_func_string) # Code for gradients of Kdiag wrt X self._dKdiag_dX_code= \ """ + // _dKdiag_dX_code + // Code for computing gradient of diagonal with respect to inputs. int N = partial_array->dimensions[0]; int input_dim = X_array->dimensions[1]; for (int i=0;i Date: Mon, 14 Oct 2013 17:11:39 +0100 Subject: [PATCH 4/7] docstrinfs in kern.py --- GPy/kern/kern.py | 53 ++++++++++++++++++++++++---------- GPy/kern/parts/hierarchical.py | 2 +- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 08f36109..805c6b43 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -79,15 +79,14 @@ class kern(Parameterized): def plot_ARD(self, fignum=None, ax=None, title='', legend=False): - """If an ARD kernel is present, it bar-plots the ARD parameters. + """If an ARD kernel is present, plot a bar representation using matplotlib :param fignum: figure number of the plot :param ax: matplotlib axis to plot on - :param title: - title of the plot, + :param title: + title of the plot, pass '' to not print a title pass None for a generic title - """ if ax is None: fig = pb.figure(fignum) @@ -152,6 +151,13 @@ class kern(Parameterized): return ax def _transform_gradients(self, g): + """ + Apply the transformations of the kernel so that the returned vector + represents the gradient in the transformed space (i.e. that given by + get_params_transformed()) + + :param g: the gradient vector for the current model, usually created by dK_dtheta + """ x = self._get_params() [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] @@ -162,7 +168,9 @@ class kern(Parameterized): return g def compute_param_slices(self): - """create a set of slices that can index the parameters of each part.""" + """ + Create a set of slices that can index the parameters of each part. + """ self.param_slices = [] count = 0 for p in self.parts: @@ -170,14 +178,19 @@ class kern(Parameterized): count += p.num_params def __add__(self, other): - """ - Shortcut for `add`. - """ + """ Overloading of the '+' operator. for more control, see self.add """ return self.add(other) def add(self, other, tensor=False): """ - Add another kernel to this one. Both kernels are defined on the same _space_ + Add another kernel to this one. + + If Tensor is False, both kernels are defined on the same _space_. then + the created kernel will have the same number of inputs as self and + other (which must be the same). + + If Tensor is True, then the dimensions are stacked 'horizontally', so + that the resulting kernel has self.input_dim + other.input_dim :param other: the other kernel to be added :type other: GPy.kern @@ -210,9 +223,7 @@ class kern(Parameterized): return newkern def __mul__(self, other): - """ - Shortcut for `prod`. - """ + """ Here we overload the '*' operator. See self.prod for more information""" return self.prod(other) def __pow__(self, other, tensor=False): @@ -228,7 +239,7 @@ class kern(Parameterized): :param other: the other kernel to be added :type other: GPy.kern :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool + :type tensor: bool """ K1 = self.copy() @@ -307,6 +318,17 @@ class kern(Parameterized): return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], []) def K(self, X, X2=None, which_parts='all'): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handles this as X2 == X. + :param which_parts: a list of booleans detailing whether to include + each of the part functions. By default, 'all' + indicates [True]*self.num_parts + """ if which_parts == 'all': which_parts = [True] * self.num_parts assert X.shape[1] == self.input_dim @@ -321,7 +343,7 @@ class kern(Parameterized): def dK_dtheta(self, dL_dK, X, X2=None): """ Compute the gradient of the covariance function with respect to the parameters. - + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. :type dL_dK: Np.ndarray (num_samples x num_inducing) :param X: Observed data inputs @@ -329,6 +351,7 @@ class kern(Parameterized): :param X2: Observed data inputs (optional, defaults to X) :type X2: np.ndarray (num_inducing x input_dim) + returns: dL_dtheta """ assert X.shape[1] == self.input_dim target = np.zeros(self.num_params) @@ -340,7 +363,7 @@ class kern(Parameterized): return self._transform_gradients(target) def dK_dX(self, dL_dK, X, X2=None): - """Compute the gradient of the covariance function with respect to X. + """Compute the gradient of the objective function with respect to X. :param dL_dK: An array of gradients of the objective function with respect to the covariance function. :type dL_dK: np.ndarray (num_samples x num_inducing) diff --git a/GPy/kern/parts/hierarchical.py b/GPy/kern/parts/hierarchical.py index ab96fdd7..c629f6b9 100644 --- a/GPy/kern/parts/hierarchical.py +++ b/GPy/kern/parts/hierarchical.py @@ -7,7 +7,7 @@ from independent_outputs import index_to_slices class Hierarchical(Kernpart): """ - A kernel part which can reopresent a hierarchy of indepencnce: a gerenalisation of independent_outputs + A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs """ def __init__(self,parts): From da2a88826d670f4284d466dd291d539b9428cf47 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Mon, 14 Oct 2013 22:09:41 +0100 Subject: [PATCH 5/7] Basic sim code functional. --- GPy/core/model.py | 2 +- GPy/kern/constructors.py | 4 +-- GPy/kern/parts/sympykern.py | 67 ++++++++++++++++++++++++++----------- GPy/util/symbolic.py | 12 ++++++- 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 7aff8f4d..c1ab7b6a 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -259,7 +259,7 @@ class Model(Parameterized): these terms are present in the name the parameter is constrained positive. """ - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] + positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa'] # param_names = self._get_param_names() currently_constrained = self.all_constrained_indices() to_make_positive = [] diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index c6a6672f..392f43ba 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -330,11 +330,11 @@ if sympy_available: dist = parse_expr(dist_string) f = variance*sp.exp(-dist/2.) else: - lengthscale = sp.var('lengthscale_i lengthscale_j',positive=True) + lengthscales = sp.var('lengthscale_i lengthscale_j',positive=True) shared_lengthscale = sp.var('shared_lengthscale',positive=True) dist_string = ' + '.join(['(x_%i-z_%i)**2' % (i, i) for i in range(real_input_dim)]) dist = parse_expr(dist_string) - f = scale_i*scale_j*sp.exp(-dist/(2*(shared_lengthscale**2 + lengthscale_i*lengthscale_j))) + f = scale_i*scale_j*sp.exp(-dist/(2*(lengthscale_i**2 + lengthscale_j**2 + shared_lengthscale**2))) return kern(input_dim, [spkern(input_dim, f, output_dim=output_dim, name='eq_sympy')]) def sinc(input_dim, ARD=False, variance=1., lengthscale=1.): diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index ea603eab..88c179aa 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -117,6 +117,9 @@ class spkern(Kernpart): return spkern(self._sp_k+other._sp_k) def _gen_code(self): + """Generates the C functions necessary for computing the covariance function using the sympy objects as input.""" + #TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances?? + #generate c functions from sympy objects argument_sequence = self._sp_x+self._sp_z+self._sp_theta code_list = [('k',self._sp_k)] @@ -138,15 +141,20 @@ class spkern(Kernpart): # Substitute any known derivatives which sympy doesn't compute self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - # This is the basic argument construction for the C code. - #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] - # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) + + ############################################################ + # This is the basic argument construction for the C code. # + ############################################################ + arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) + + # for multiple outputs need to also provide these arguments reversed. if self.output_dim>1: reverse_arg_list = list(arg_list) reverse_arg_list.reverse() + # Add in any 'shared' parameters to the list. param_arg_list = [shared_params.name for shared_params in self._sp_theta] arg_list += param_arg_list @@ -163,6 +171,15 @@ class spkern(Kernpart): reverse_arg_string = ", ".join(reverse_arg_list) arg_string = ", ".join(arg_list) precompute_string = "\n".join(precompute_list) + + # Code to compute argments string needed when only X is provided. + X_arg_string = re.sub('Z','X',arg_string) + # Code to compute argument string when only diagonal is required. + diag_arg_string = re.sub('int jj','//int jj',X_arg_string) + diag_arg_string = re.sub('j','i',diag_arg_string) + diag_precompute_string = precompute_list[0] + + # Here's the code to do the looping for K self._K_code =\ """ @@ -184,14 +201,28 @@ class spkern(Kernpart): %s """%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed - - # Code to compute diagonal of covariance. - diag_arg_string = re.sub('Z','X',arg_string) - diag_arg_string = re.sub('int jj','//int jj',diag_arg_string) - diag_arg_string = re.sub('j','i',diag_arg_string) - diag_precompute_string = re.sub('int jj','//int jj',precompute_string) - diag_precompute_string = re.sub('Z','X',diag_precompute_string) - diag_precompute_string = re.sub('j','i',diag_precompute_string) + self._K_code_X = """ + // _K_code_X + // Code for computing the covariance function. + int i; + int j; + int N = target_array->dimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;i1: grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) + grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] + grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) grad_func_string = '\n'.join(grad_func_list) self._dK_dtheta_code =\ @@ -241,7 +272,7 @@ class spkern(Kernpart): diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0) diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string) diag_grad_func_string = re.sub('j','i',diag_grad_func_string) - diag_grad_func_string = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_grad_func_string) + diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string) self._dKdiag_dtheta_code =\ """ // _dKdiag_dtheta_code @@ -259,7 +290,7 @@ class spkern(Kernpart): gradX_func_list = [] if self.output_dim>1: gradX_func_list += c_define_output_indices - gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] gradX_func_string = "\n".join(gradX_func_list) self._dK_dX_code = \ @@ -284,7 +315,7 @@ class spkern(Kernpart): diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0) diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string) diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string) - diag_gradX_func_string = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradX_func_string) + diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string) # Code for gradients of Kdiag wrt X self._dKdiag_dX_code= \ @@ -304,10 +335,8 @@ class spkern(Kernpart): #self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i') # Code to use when only X is provided. - self._K_code_X = self._K_code.replace('Z[', 'X[') self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[') self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[') - self._K_code_X = self._K_code.replace('Z2(', 'X2(') self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z2(', 'X2(') self._dK_dX_code_X = self._dK_dX_code.replace('Z2(', 'X2(') diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 8b368a77..10c59a5e 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -22,9 +22,19 @@ class ln_diff_erf(Function): class sim_h(Function): nargs = 5 + def fdiff(self, argindex=1): + pass + @classmethod def eval(cls, t, tprime, d_i, d_j, l): - return exp((d_j/2*l)**2)/(d_i+d_j)*(exp(-d_j*(tprime - t))*(erf((tprime-t)/l - d_j/2*l) + erf(t/l + d_j/2*l)) - exp(-(d_j*tprime + d_i))*(erf(tprime/l - d_j/2*l) + erf(d_j/2*l))) + # putting in the is_Number stuff forces it to look for a fdiff method for derivative. + return (exp((d_j/2*l)**2)/(d_i+d_j) + *(exp(-d_j*(tprime - t)) + *(erf((tprime-t)/l - d_j/2*l) + + erf(t/l + d_j/2*l)) + - exp(-(d_j*tprime + d_i)) + *(erf(tprime/l - d_j/2*l) + + erf(d_j/2*l)))) class erfc(Function): nargs = 1 From 491eb7243a5ea35b08dc2ba827703ac7f869f188 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 15 Oct 2013 05:49:11 +0100 Subject: [PATCH 6/7] Added xw_pen data. --- GPy/util/datasets.py | 14 ++++++++++++++ GPy/util/symbolic.py | 26 +++++++++++++++++++------- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index a6a97457..d13e9f6c 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -145,6 +145,12 @@ The database was created with funding from NSF EIA-0196217.""", 'citation' : 'A Global Geometric Framework for Nonlinear Dimensionality Reduction, J. B. Tenenbaum, V. de Silva and J. C. Langford, Science 290 (5500): 2319-2323, 22 December 2000', 'license' : None, 'size' : 24229368}, + 'xw_pen' : {'urls' : [neil_url + 'xw_pen/'], + 'files' : [['xw_pen_15.csv']], + 'details' : """Accelerometer pen data used for robust regression by Tipping and Lawrence.""", + 'citation' : 'Michael E. Tipping and Neil D. Lawrence. Variational inference for Student-t models: Robust Bayesian interpolation and generalised component analysis. Neurocomputing, 69:123--141, 2005', + 'license' : None, + 'size' : 3410} } @@ -608,6 +614,14 @@ def olivetti_faces(data_set='olivetti_faces'): Y = np.asarray(Y) lbls = np.asarray(lbls)[:, None] return data_details_return({'Y': Y, 'lbls' : lbls, 'info': "ORL Faces processed to 64x64 images."}, data_set) + +def xw_pen(data_set='xw_pen'): + if not data_available(data_set): + download_data(data_set) + Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',') + X = np.arange(485)[:, None] + return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen."}, data_set) + def download_rogers_girolami_data(): if not data_available('rogers_girolami_data'): diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 10c59a5e..0b5ca381 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -28,13 +28,25 @@ class sim_h(Function): @classmethod def eval(cls, t, tprime, d_i, d_j, l): # putting in the is_Number stuff forces it to look for a fdiff method for derivative. - return (exp((d_j/2*l)**2)/(d_i+d_j) - *(exp(-d_j*(tprime - t)) - *(erf((tprime-t)/l - d_j/2*l) - + erf(t/l + d_j/2*l)) - - exp(-(d_j*tprime + d_i)) - *(erf(tprime/l - d_j/2*l) - + erf(d_j/2*l)))) + if (t.is_Number + and tprime.is_Number + and d_i.is_Number + and d_j.is_Number + and l.is_Number): + if (t is S.NaN + or tprime is S.NaN + or d_i is S.NaN + or d_j is S.NaN + or l is S.NaN): + return S.NaN + else: + return (exp((d_j/2*l)**2)/(d_i+d_j) + *(exp(-d_j*(tprime - t)) + *(erf((tprime-t)/l - d_j/2*l) + + erf(t/l + d_j/2*l)) + - exp(-(d_j*tprime + d_i)) + *(erf(tprime/l - d_j/2*l) + + erf(d_j/2*l)))) class erfc(Function): nargs = 1 From a4c0a941becf8f7818a525ecd6915bf008a3cf0d Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Tue, 15 Oct 2013 05:53:39 +0100 Subject: [PATCH 7/7] Added xw_pen data. --- GPy/util/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index d13e9f6c..f5947179 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -620,7 +620,7 @@ def xw_pen(data_set='xw_pen'): download_data(data_set) Y = np.loadtxt(os.path.join(data_path, data_set, 'xw_pen_15.csv'), delimiter=',') X = np.arange(485)[:, None] - return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen."}, data_set) + return data_details_return({'Y': Y, 'X': X, 'info': "Tilt data from a personalized digital assistant pen. Plot in original paper showed regression between time steps 175 and 275."}, data_set) def download_rogers_girolami_data():