Given a files
salaries.csv
with this structure:City,Job,Salary Delhi,Doctors,500 Delhi,Lawyers,400 Delhi,Plumbers,100 London,Doctors,800 London,Lawyers,700 London,Plumbers,300 Tokyo,Doctors,900 Tokyo,Lawyers,800 Tokyo,Plumbers,400 ...... print the standard deviation of salaries by profession.
Sample output
Plumbers 311 Lawyers 286 Doctors 448
Solution
from __future__ import with_statement
from math import sqrt
def get_stats(profession, salaries):
n = float(len(salaries))
mean = sum(salaries)/n
stdev = 0
for value in salaries:
stdev += (value - mean)**2
stdev = (stdev/(n))**0.5
print profession, stdev
with open('salaries.csv') as f:
f.readline()
# Create the list of salaries
salaries = {}
for line in f.readlines():
country, profession, value = line.split(',')
value = int(value.strip())
profession = profession.strip()
if salaries.has_key(profession):
salaries[profession].append(value)
else:
salaries[profession] = [value]
for k,v in salaries.items():
get_stats(k,v)
No comments:
Post a Comment