Packages and Constants
import pandas as pd
import numpy as np
import altair as alt
import os
import math
PATH = os.getcwd() + "/data/demographics.csv"
Generic Functions
def get_column_from_csv(file, col_name):
# Try to get the file and if it doesn't exist issue a warning
try:
df = pd.read_csv(file)
except FileNotFoundError:
print("File Doesn't Exist")
else:
return df[col_name]
def to_precision(x,p):
"""
returns a string representation of x formatted with a precision of p
Based on the webkit javascript implementation taken from here:
https://code.google.com/p/webkit-mirror/source/browse/JavaScriptCore/kjs/number_object.cpp
"""
x = float(x)
if x == 0.:
return "0." + "0"*(p-1)
out = []
if x < 0:
out.append("-")
x = -x
e = int(math.log10(x))
tens = math.pow(10, e - p + 1)
n = math.floor(x/tens)
if n < math.pow(10, p - 1):
e = e -1
tens = math.pow(10, e - p+1)
n = math.floor(x / tens)
if abs((n + 1.) * tens - x) <= abs(n * tens -x):
n = n + 1
if n >= math.pow(10,p):
n = n / 10.
e = e + 1
m = "%.*g" % (p, n)
if e < -2 or e >= p:
out.append(m[0])
if p > 1:
out.append(".")
out.extend(m[1:p])
out.append('e')
if e > 0:
out.append("+")
out.append(str(e))
elif e == (p -1):
out.append(m)
elif e >= 0:
out.append(m[:e+1])
if e+1 < len(m):
out.append(".")
out.extend(m[e+1:])
else:
out.append("0.")
out.extend(["0"]*-(e+1))
out.append(m)
return "".join(out)
Read file, as csv was saved on excel it has some issues of encoding, also actual header is for humans.
df = pd.read_csv(PATH, header=1,encoding='cp1252')
df.head()
Wrangle the useful stuff
df = df.rename(columns = {
'WC04_01': 'PID',
'WC04_03': 'UID',
'WC04_02': 'codename',
'SD02_01': 'age',
'PD04_01': 'gender',
'SP03_01': 'vr_exp_a',
'SP03_02': 'vr_exp_b',
'MK02_01': 'music_years',
'MK04_01': 'computer_music_years'
})
value_list = ['AF01_rgs', 'OQ01_01', 'OQ02_01','OQ03_01','OQ04_01', 'OQ05_01','MK01',
'MK01_01','MK01_02','MK01_03','MK01_04','MK01_05','MK01_06','MK01_08','MK01_08a', 'MK01_09',
'MK01_07','MK01_07a','MK05_01','MK06','MK01_09a']
df = df.drop(value_list, axis = 1)
# Female = 0, Male = 1
df.gender = [1,0,1,1,1,1,1,1,1,1,1,1]
df
print('Age, Years M(SD): '+ to_precision(df.age.mean(),3) +'('+ to_precision(df.age.std(),3) +')')
print('VR Experience A, 1-7 M(SD): '+ to_precision(df.vr_exp_a.mean(),3) +'('+ to_precision(df.vr_exp_a.std(),3) +')')
print('VR Experience B, 1-7 M(SD): '+ to_precision(df.vr_exp_b.mean(),3) +'('+ to_precision(df.vr_exp_b.std(),3) +')')
print('Music Experience, Years M(SD): '+ to_precision(df.music_years.mean(),3) +'('+ to_precision(df.music_years.std(),3) +')')
print('Computer Music Experience, Years M(SD): '+ to_precision(df.computer_music_years.mean(),3) +'('+ to_precision(df.computer_music_years.std(),3) +')')
print('Genders: M=11, F=1' )
msi_df = df.loc[:, 'UID':'MS02_05' ]
value_list = ['codename', 'vr_exp_a', 'vr_exp_b','music_years','computer_music_years']
msi_df = msi_df.drop(value_list, axis = 1)
# Recode musical training q's
# 0; 1; 2; 3; 4-5; 6-9; 10 or more
# 1 2 3 4 5 6 7
def recode_msi_instrument(years):
years = int(years)
if years == 0:
return 1
elif years == 1:
return 2
elif years == 2:
return 3
elif years == 3:
return 4
elif 4 <= years <= 5:
return 5
elif 6 <= years <= 9:
return 6
elif years > 9:
return 7
else:
return 0
assert recode_msi_instrument(-1) == 0
assert recode_msi_instrument(0) == 1
assert recode_msi_instrument(1) == 2
assert recode_msi_instrument(2) == 3
assert recode_msi_instrument(3) == 4
assert recode_msi_instrument(5) == 5
assert recode_msi_instrument(7) == 6
assert recode_msi_instrument(11) == 7
# 0 / 0.5 / 1 / 1.5 / 2 / 3-4 / 5 or more
# 1 2 3 4 5 6 7
def recode_msi_practise(hours):
if hours == 0:
return 1
elif 0 < hours <= 0.5:
return 2
elif 0.5 < hours <= 1:
return 3
elif 1 < hours <= 1.5:
return 4
elif 1.5 < hours <= 2:
return 5
elif 2 < hours <= 4:
return 6
elif hours > 4:
return 7
else:
return 0
assert recode_msi_practise(-1) == 0
assert recode_msi_practise(0) == 1
assert recode_msi_practise(0.5) == 2
assert recode_msi_practise(1) == 3
assert recode_msi_practise(1.5) == 4
assert recode_msi_practise(2) == 5
assert recode_msi_practise(3.5) == 6
assert recode_msi_practise(5.5) == 7
# 0 / 0.5 / 1 / 2 / 3 / 4-6 / 7 or more
# 1 2 3 4 5 6 7
def recode_msi_theory(years):
if years == 0:
return 1
elif 0 < years <= 0.5:
return 2
elif 0.5 < years <= 1:
return 3
elif 1 < years <= 2:
return 4
elif 2 < years <= 3:
return 5
elif 3 < years <= 6:
return 6
elif years > 6:
return 7
else:
return 0
assert recode_msi_theory(-1) == 0
assert recode_msi_theory(0) == 1
assert recode_msi_theory(0.5) == 2
assert recode_msi_theory(1) == 3
assert recode_msi_theory(1.5) == 4
assert recode_msi_theory(2) == 4
assert recode_msi_theory(3) == 5
assert recode_msi_theory(5.5) == 6
assert recode_msi_theory(7) == 7
# 0 / 0.5 / 1 / 2 / 3-5 / 6-9 / 10 or more
# 1 2 3 4 5 6 7
def recode_msi_training(years):
if years == 0:
return 1
elif 0 < years <= 0.5:
return 2
elif 0.5 < years <= 1:
return 3
elif 1 < years <= 2:
return 4
elif 2 < years <= 5:
return 5
elif 5 < years <= 9:
return 6
elif years > 9:
return 7
else:
return 0
assert recode_msi_training(-1) == 0
assert recode_msi_training(0) == 1
assert recode_msi_training(0.5) == 2
assert recode_msi_training(1) == 3
assert recode_msi_training(1.5) == 4
assert recode_msi_training(2) == 4
assert recode_msi_training(3) == 5
assert recode_msi_training(5.5) == 6
assert recode_msi_training(11) == 7
# 0 / 1 / 2 / 3 / 4 / 5 / 6 or more
# 1 2 3 4 5 6 7
def recode_msi_play(number):
number = int(number)
if number >= 6:
return 7
elif number <= -1:
return 0
else:
return number + 1
assert recode_msi_play(-1) == 0
assert recode_msi_play(0) == 1
assert recode_msi_play(1) == 2
assert recode_msi_play(2) == 3
assert recode_msi_play(3) == 4
assert recode_msi_play(5) == 6
assert recode_msi_play(7) == 7
assert recode_msi_play(11) == 7
msi_df.MS02_01 = msi_df.MS02_01.apply(recode_msi_instrument)
msi_df.MS02_02 = msi_df.MS02_02.apply(recode_msi_practise)
msi_df.MS02_03 = msi_df.MS02_03.apply(recode_msi_theory)
msi_df.MS02_04 = msi_df.MS02_04.apply(recode_msi_training)
msi_df.MS02_05 = msi_df.MS02_05.apply(recode_msi_play)
# Sum items in factors and normalise to 0-1 value
# Perceptual acuity
msi_df['MsiPa'] = (msi_df.MS01_01 + msi_df.MS01_02 + ((msi_df.MS01_03*-1)+8) + msi_df.MS01_04 + ((msi_df.MS01_05*-1)+8) + msi_df.MS01_07 + msi_df.MS01_08 + ((msi_df.MS01_09*-1)+8) + msi_df.MS01_10 )/63
# Musical training
msi_df['MsiMt'] = ( ( (msi_df.MS01_06*-1)+8) + ((msi_df.MS01_11*-1)+8) + msi_df.MS02_01 + msi_df.MS02_02 + msi_df.MS02_03 + msi_df.MS02_04 + msi_df.MS02_05)/49
msi_df = msi_df.loc[:, ['UID','age','gender','MsiPa','MsiMt']]
msi_df
base = alt.Chart(msi_df)
points = base.mark_point().encode(
alt.X('MsiPa:Q', title="MSI Perceptual Acuity"),
alt.Y('MsiMt:Q', title="MSI Musical Training"),
color='UID:N',
)
points
pam_df = df.loc[:, 'UID':'gender' ]
scores = df.loc[:, 'PA02_01':'PA02_18']
# for positive items, recode strongly disagree to 0, disagree to 1, neutral to 2, agree to 3, and strongly agree to 4.
scores.iloc[:,[0,1,2,3,5,6,8,9,10,12,13,14,15,16,17]] = scores.iloc[:,[0,1,2,3,5,6,8,9,10,12,13,14,15,16,17]] - 1.0
# for negative items, recode strongly disagree to 4, disagree to 3, neutral to 2, agree to 1, and strongly agree to 0.
scores.iloc[:,[4,7,11]] = scores.iloc[:,[4,7,11]] * -1 + 5
pam_df['pam'] = scores.iloc[:,0:17].sum(axis=1) / 72
value_list = ['codename', 'vr_exp_a', 'vr_exp_b']
pam_df = pam_df.drop(value_list, axis = 1)
pam_df['SID'] = [1,1,2,2,3,3,4,4,5,5,6,6]
pam_df
base = alt.Chart(pam_df)
points = base.mark_bar().encode(
alt.X('UID:N', title="User ID"),
alt.Y('pam:Q', title="Personal Acquaintance"),
color='SID:N'
)
points