USE scipy FOR READING TSV FILE
USE matplotlib.pyplot FOR PLOTTING CHART
USE A CUSTOM UTIL CLASS USING MODULE os
UTILITY MODULE : utils.py
import os
# Creates 2 sub dir "data" and "charts" under current path
DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
CHART_DIR =
os.path.join(os.path.dirname(os.path.realpath(__file__)), "charts")
# If dirs not exist create them
for d in [DATA_DIR, CHART_DIR]:
if not os.path.exists(d):
os.mkdir(d)
EXAMPLE : webtraffic.py
import os
import scipy as sp
from utils import CHART_DIR, DATA_DIR
# Error handling method
def error(f, x, y):
return sp.sum((f(x) - y) ** 2)
# Sample data in TSV
# 1 2272
# 2 nan
# 3 1386
# 4 1365
# 5 1488
# Read data from TSV file
data = sp.genfromtxt("web_traffic.tsv", delimiter="\t")
print(data[:2])
print(data.shape)
# Extract first column data to x and sec column data to y
x = data[:,0]
print(x)
y = data[:,1]
print(y)
# Check if sec column having a Non-number value, print "Nan"
Nan = sp.sum(sp.isnan(y))
print(Nan)# Check if sec column is not having a Non-number value, print its value
x = x[~sp.isnan(y)]
print(x)
y = y[~sp.isnan(y)]
print(y)
# Set chart data and properties - Title, X/Y axis labels
import matplotlib.pyplot as plt
plt.scatter(x,y)
plt.title("Web Traffic over the last month")
plt.xlabel("time")
plt.ylabel("Hits/hour")
# Set chart properties - Ticks, Auto scale, Grid
# Set ticks on X axis => Weekly
plt.xticks([w*7*24 for w in range(10)],
['week %i'%w for w in range(10)])
# Set Auto scale according to available data
plt.autoscale(tight=True)plt.grid()
# Save the chart to picture of PNG format
plt.savefig(os.path.join(CHART_DIR,"img.png"))
'''
p = polyfit(x,y,n)
Given
data x and y and the desired order of the polynomial (straight line has order 1),
it finds the model function that minimizes the error function defined earlier.
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)
The polyfit() function returns the parameters of the fitted model function,
fp1; and by setting full to True, we also get additional background information
on the fitting process.
'''
# Linear fit = Straight line, May vary from data
# fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)
# Curved fit = Curved line very close to data
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 10, full=True)
print("Model parameters: %s" % fp1)
print(residuals)
f1 = sp.poly1d(fp1)
print(error(f1, x, y))
# Generate X-values for plotting
fx = sp.linspace(0,x[-1], 1000)
plt.plot(fx, f1(fx),'C8',linewidth=4)
plt.legend(["d=%i" % f1.order], loc="upper left")