Saturday, 23 September 2017

How to create Plot charts from TSV data ?


USE scipy FOR READING TSV FILE
USE matplotlib.pyplot FOR PLOTTING CHART
USE A CUSTOM UTIL CLASS USING MODULE os


UTILITY MODULE : utils.py
import os
# Creates 2 sub dir "data" and "charts" under current path 
DATA_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
CHART_DIR

 os.path.join(os.path.dirname(os.path.realpath(__file__)), "charts")

# If dirs not exist create them

for d in [DATA_DIR, CHART_DIR]:
    if not
os.path.exists(d):
        os.mkdir(d)
 


EXAMPLE : webtraffic.py
import os
import scipy as sp
from utils import CHART_DIR, DATA_DIR


# Error handling method

def error(f, x, y):
    return
sp.sum((f(x) - y) ** 2)


# Sample data in TSV

# 1    2272
# 2    nan
# 3    1386
# 4    1365
# 5    1488
 

# Read data from TSV file
data = sp.genfromtxt("web_traffic.tsv", delimiter="\t")
print(data[:2])
print(data.shape)


# Extract first column data to x and
sec column data to y
x = data[:,0]
print(x)

y = data[:,1]
print(y)

# Check if sec column having a Non-number value, print "Nan"

Nan = sp.sum(sp.isnan(y))
print(Nan)
# Check if sec column is not having a Non-number value, print its value
x = x[~sp.isnan(y)]
print(x)
y = y[~sp.isnan(y)]
print(y)


# Set chart data and properties - Title, X/Y axis labels

import matplotlib.pyplot as plt 
plt.scatter(x,y) 
plt.title("Web Traffic over the last month") 
plt.xlabel("time") 
plt.ylabel("Hits/hour")


# Set chart properties - Ticks, Auto scale, Grid
# Set ticks on X axis => Weekly
plt.xticks([w*7*24 for w in range(10)],
           ['week %i'%w for w in range(10)])
# Set Auto scale according to available data
plt.autoscale(tight=True)plt.grid()

# Save the chart to picture of PNG format

plt.savefig(os.path.join(CHART_DIR,"img.png"))


'''
p = polyfit(x,y,n)
Given
data x and y and the desired order of the polynomial (straight line has order 1),
it finds the model function that minimizes the error function defined earlier.
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)
The polyfit() function returns the parameters of the fitted model function,
fp1; and by setting full to True, we also get additional background information
on the fitting process.
'''


# Linear fit = Straight line, May vary from data
# fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)

# Curved fit = Curved line very close to data
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 10, full=True)

print("Model parameters: %s" % fp1)
print(residuals)


f1 = sp.poly1d(fp1)
print(error(f1, x, y))

# Generate X-values for plotting

fx = sp.linspace(0,x[-1], 1000)
plt.plot(fx, f1(fx),'C8',linewidth=4)
 
plt.legend(["d=%i" % f1.order], loc="upper left")

No comments:

Post a Comment

Note: only a member of this blog may post a comment.