Sabermetrics scripts for some old blog posts analyzing the 2014 Oakland A's.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

424 lines
9.8 KiB

from numpy import *
from numpy import linalg
"""
Regression Classes
These classes are base classes for linear and log regression.
Currently does linear and polynomial fits.
+ OneDRegression
++ OneDLinearRegression
+++ OneDLogRegression
++++ OneDLog10Regression
+++ OneDSemilogyRegression
++ OneDLinearPolyfit
+++ OneDLogPolyfit
"""
class OneDRegression(object):
"""
Base class for regression operations.
"""
def x(self):
"""Return original x data"""
return self.x
def y(self):
"""Return original y data"""
return self.y
def coeffs(self):
return self.coeffs
class OneDLinearRegression(OneDRegression):
"""
Compute slope and intercept of least squares fit
of a 1D function y = f(x).
This is a barebones wrapper for the Numpy
lstsq function.
"""
def __init__(self,x,y):
"""Perform linear regression of y(x)."""
assert shape(x) == shape(y)
self.x = x
self.y = y
# Linear regression
A = vstack([ x, ones(shape(x)) ]).T
w, resid = linalg.lstsq(A, y)[:2]
self.slope = w[0]
self.intercept = w[1]
self.r2 = 1 - ( resid/(len(y)*y.var()) )
def get_slope(self):
"""Return slope of regressed line"""
return self.slope
def get_intercept(self):
"""Return intercept of regressed line"""
return self.intercept
def r2(self):
return self.r2
def get_point(self, x1, x2, y1):
"""
Return y coordinate of a point y2 given
x1, x2, and y1.
"""
y2 = y1 + self.slope*(x2-x1)
return y2
def f(self, x):
"""Given some x, give some y"""
return self.slope*x + self.intercept
class OneDLogRegression(OneDLinearRegression):
"""
Compute the slope and intercept of a log-log
function logy(logx). This is a barebones
wrapper for the Numpy least squares function.
You pass it your y(x), and it takes care of the log stuff.
"""
def __init__(self,x,y):
"""Perform linear regression of logy(logx)."""
assert shape(x) == shape(y)
self.x = x
self.y = y
# Start by taking log of data
c=0
logx = zeros( shape(x) )
logy = zeros( shape(y) )
for i,j in zip(x,y):
logj = log(j)
if not isinf(logj):
logx[c] = log(i)
logy[c] = logj
c=c+1
self.logx = logx
self.logy = logy
# Linear regression
OneDLinearRegression.__init__(self,logx,logy)
def logx(self):
"""Return logarithm of original x data."""
return self.logx
def logy(self):
"""Return logarithm of original y data."""
return self.logy
def get_log_point(self, logx1, logx2, logy1):
"""
Return y coordinate of a point y2 given
log(x1), log(x2), and log(y1).
This returns y2, it does NOT return log(y2).
"""
logy2 = logy1 + self.slope*( logx2 - logx1 )
return pow(e,logy2)
def f( self, x ):
"""Given some x, give some y (for plots)"""
logx = zeros(shape(x))
c = 0
for i in x:
logx[c] = log( x[c] )
c = c+1
logy = self.slope*logx + self.intercept
return pow(e,logy)
class OneDLog10Regression(OneDLinearRegression):
"""
Compute the slope and intercept of a log-log
function logy(logx). This is a barebones
wrapper for the Numpy least squares function.
You pass it your y(x), and it takes care of the log stuff.
"""
def __init__(self,x,y):
"""Perform linear regression of logy(logx)."""
assert shape(x) == shape(y)
self.x = x
self.y = y
# Start by taking log of data
c=0
logx = zeros( shape(x) )
logy = zeros( shape(y) )
for i,j in zip(x,y):
logj = log10(j)
if not isinf(logj):
logx[c] = log10(i)
logy[c] = logj
c=c+1
self.logx = logx
self.logy = logy
# Linear regression
OneDLinearRegression.__init__(self,logx,logy)
def get_log_point(self, logx1, logx2, logy1):
"""
Return y coordinate of a point y2 given
log(x1), log(x2), and log(y1).
This returns y2, it does NOT return log(y2).
"""
logy2 = logy1 + self.slope*( logx2 - logx1 )
return pow(10,logy2)
def f( self, x ):
"""Given some x, give some y (for plots)"""
logx = zeros(shape(x))
c = 0
for i in x:
logx[c] = log10( x[c] )
c = c+1
logy = self.slope*logx + self.intercept
return pow(10,logy)
class OneDSemilogyRegression(OneDLinearRegression):
"""
Regression of a series log(y) = m*x + b
"""
def __init__(self,x,y,SomeVerySmallNumber=1e-10):
"""
Transform y into logy, then perform
regression for logy = mx + b
"""
assert shape(x) == shape(y)
mask = (y > SomeVerySmallNumber)
loggable = len(mask[mask==True])
try:
ymask = y[mask].values
xmask = x[mask].values
except:
ymask = y[mask]
xmask = x[mask]
logy = zeros(loggable,)
for i in range(loggable):
logy[i] = log(ymask[i])
self.x = xmask
self.y = ymask
self.logy = logy
# Linear regression
OneDLinearRegression.__init__(self,xmask,logy)
def f(self, x):
"""Given some x, give some y"""
logy = self.slope*x + self.intercept
y = array([])
#print "shape of x:"
#print shape(x)
#print type(x)
for i in logy:
try:
y = append(y,exp(i))
except AttributeError:
pass
return y
def logf(self,x):
log_result = self.slope*x + self.intercept
return log_result
def x(self):
return self.x
def y(self):
return self.y
def logy(self):
return self.logy
class OneDLinearPolyfit(OneDRegression):
"""
Compute slope and intercept of least squares fit
of a 1D function y = f(x).
This is a barebones wrapper for the Numpy
polyfit function.
"""
def __init__(self,x,y,deg=1):
assert shape(x) == shape(y)
self.x = x
self.y = y
# polyfit
myfit = polyfit(x,y,deg)
if deg == 1:
self.slope = myfit[0]
self.intercept = myfit[1]
self.coeffs = myfit
self.p1d = poly1d(myfit)
def f(self,x):
return self.p1d(x)
class OneDLogPolyfit(OneDLinearPolyfit,OneDLogRegression):
"""
Compute the slope and intercept of a log-log
function logy(logx). This is a barebones
wrapper for the Numpy least squares function.
You pass it your y(x), and it takes care of the log stuff.
"""
def __init__(self,x,y,deg=1):
"""Perform linear regression of y(x)."""
assert shape(x) == shape(y)
self.x = x
self.y = y
self.deg = deg
# Start by taking log of data
c=0
logx = zeros( shape(x) )
logy = zeros( shape(y) )
for i,j in zip(x,y):
logj = log(j)
if not isinf(logj):
logx[c] = log(i)
logy[c] = logj
c=c+1
self.logx = logx
self.logy = logy
# Linear regression
OneDLinearPolyfit.__init__(self,logx,logy,deg=deg)
def deg(self):
return self.deg
def f(self,x):
c = 0
for i in x:
logx[c] = log( x[c] )
c = c+1
logy = self.p1d(logx)
return exp(logy)
class OneDLog10Polyfit(OneDLinearPolyfit,OneDLogRegression):
"""
Compute the slope and intercept of a log-log
function logy(logx). This is a barebones
wrapper for the Numpy least squares function.
You pass it your y(x), and it takes care of the log stuff.
"""
def __init__(self,x,y,deg=1):
"""Perform linear regression of y(x)."""
assert shape(x) == shape(y)
self.x = x
self.y = y
self.deg = deg
# Start by taking log of data
c=0
logx = zeros( shape(x) )
logy = zeros( shape(y) )
for i,j in zip(x,y):
logj = log10(j)
if not isinf(logj):
logx[c] = log10(i)
logy[c] = logj
c=c+1
# Linear regression
OneDLinearPolyfit.__init__(self,logx,logy,deg=deg)
self.x = x
self.y = y
self.logx = logx
self.logy = logy
def f(self, x):
c = 0
for i in x:
logx[c] = log10( x[c] )
c = c+1
logy = self.p1d(logx)
return pow(10,logy)
class OneDSemilogyPolyfit(OneDLinearPolyfit):
"""Perform linear regression of y(x)."""
def __init(self,x,y,deg=1):
"""
Transform y into logy, then perform
poly regression for logy = mx + b
"""
assert shape(x) == shape(y)
mask = (y > SomeVerySmallNumber)
loggable = len(mask[mask==True])
try:
ymask = y[mask].values
xmask = x[mask].values
except:
ymask = y[mask]
xmask = x[mask]
logy = zeros(loggable,)
for i in range(loggable):
logy[i] = log(ymask[i])
self.x = xmask
self.y = ymask
self.logy = logy
# Linear regression
OneDLinearPolyfit.__init__(self,xmask,logy,deg=deg)
def f(self,x):
logy = self.p1d(x)
y = array([])
for i in logy:
try:
y = append(y,exp(i))
except AttributeError:
pass
return y