I have created a small script that:
- Creates a lot of random points.
- Runs a small brute force search to find a rect that has a low error, that is a good fit for the data.
- Runs a linear regression on the rect generated with brute force to further reduce the error.
- Prints out information and plots the data.
Down here you can see an example graph: the random points in blue, the brute forced rect, in blue, the 'linear regressed' rect in green.
from __future__ import division
import random
import matplotlib.pyplot as p
import numpy as np
def random_point():
return (random.random(),random.random())
def list_of_random_points(n):
return [random_point() for _ in range(n)]
def error_of_point(point,function):
x,y = point[0],point[1]
return abs(y - function(x))
def error_of_list_of_points(points,function):
return sum((error_of_point(point,function) for point in points))
def get_starting_rect(points):
min_error = 10**10
for m1 in range(-10,10):
for q1 in range(-10,10):
m,q = m1/10,q1/10
function = lambda x: m*x + q
if error_of_list_of_points(points,function) < min_error:
min_error = error_of_list_of_points(points,function)
best_mq = m,q
return best_mq
def get_approximate_m(m,q,points,sensibility):
if error_of_list_of_points(points,lambda x: (m+sensibility)*x+q) < error_of_list_of_points(points,lambda x: m*x + q):
m += sensibility
elif error_of_list_of_points(points,lambda x: (m-sensibility)*x+q) < error_of_list_of_points(points,lambda x: m*x + q):
m -= sensibility
return m
def get_approximate_q(m,q,points,sensibility):
if error_of_list_of_points(points,lambda x: (m)*x+q+sensibility) < error_of_list_of_points(points,lambda x: m*x + q):
q += sensibility
elif error_of_list_of_points(points,lambda x: (m)*x+q-sensibility) < error_of_list_of_points(points,lambda x: m*x + q):
q -= sensibility
return q
def approximate_better_m_and_q(m,q,points,sensibility):
for _ in range(100):
m = get_approximate_m(m,q,points,sensibility)
q = get_approximate_q(m,q,points,sensibility)
sensibility /= 10
for _ in range(100):
m = get_approximate_m(m,q,points,sensibility)
q = get_approximate_q(m,q,points,sensibility)
return m,q
def plot_rect(m,q):
x = np.arange(0,1,0.1)
y = [i*m + q for i in x]
p.plot(x,y)
def plot_rect_and_points(rect,points):
p.scatter(*zip(*points))
m,q = rect
plot_rect(m,q)
def main():
NUMBER_OF_POINTS = 1000
points = list_of_random_points(NUMBER_OF_POINTS)
m,q = get_starting_rect(points)
plot_rect(m,q)
print("""The rect generated with brute force of equation y = {}x + {}
has an error of {}""".format(m,q,error_of_list_of_points(points,lambda x: m*x + q)))
better_rect = approximate_better_m_and_q(m,q,points,0.1)
m,q = better_rect
print("""The rect generated with linear regression of equation y = {}x + {}
starting from the rect generated with
brute force has an error of {}""".format(
m,q,error_of_list_of_points(points,lambda x: m*x + q)))
plot_rect_and_points((m,q),points)
p.show()
if __name__ == "__main__":
main()