Packages and Constants

import pandas as pd
import numpy as np
import altair as alt
import os
import math

PATH = os.getcwd() + "/data/demographics.csv"

Generic Functions

def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

def to_precision(x,p):
    """
    returns a string representation of x formatted with a precision of p

    Based on the webkit javascript implementation taken from here:
    https://code.google.com/p/webkit-mirror/source/browse/JavaScriptCore/kjs/number_object.cpp
    """

    x = float(x)

    if x == 0.:
        return "0." + "0"*(p-1)

    out = []

    if x < 0:
        out.append("-")
        x = -x

    e = int(math.log10(x))
    tens = math.pow(10, e - p + 1)
    n = math.floor(x/tens)

    if n < math.pow(10, p - 1):
        e = e -1
        tens = math.pow(10, e - p+1)
        n = math.floor(x / tens)

    if abs((n + 1.) * tens - x) <= abs(n * tens -x):
        n = n + 1

    if n >= math.pow(10,p):
        n = n / 10.
        e = e + 1

    m = "%.*g" % (p, n)

    if e < -2 or e >= p:
        out.append(m[0])
        if p > 1:
            out.append(".")
            out.extend(m[1:p])
        out.append('e')
        if e > 0:
            out.append("+")
        out.append(str(e))
    elif e == (p -1):
        out.append(m)
    elif e >= 0:
        out.append(m[:e+1])
        if e+1 < len(m):
            out.append(".")
            out.extend(m[e+1:])
    else:
        out.append("0.")
        out.extend(["0"]*-(e+1))
        out.append(m)

    return "".join(out)

Data loading and cleaning¶

Read file, as csv was saved on excel it has some issues of encoding, also actual header is for humans.

df = pd.read_csv(PATH, header=1,encoding='cp1252')
df.head()

Wrangle the useful stuff

df = df.rename(columns = {
    'WC04_01': 'PID',
    'WC04_03': 'UID',
    'WC04_02': 'codename',
    'SD02_01': 'age',
    'PD04_01': 'gender',
    'SP03_01': 'vr_exp_a',
    'SP03_02': 'vr_exp_b',
    'MK02_01': 'music_years',
    'MK04_01': 'computer_music_years'
})
value_list = ['AF01_rgs', 'OQ01_01', 'OQ02_01','OQ03_01','OQ04_01', 'OQ05_01','MK01',
              'MK01_01','MK01_02','MK01_03','MK01_04','MK01_05','MK01_06','MK01_08','MK01_08a', 'MK01_09',
              'MK01_07','MK01_07a','MK05_01','MK06','MK01_09a']
df = df.drop(value_list, axis = 1)
# Female = 0, Male = 1
df.gender = [1,0,1,1,1,1,1,1,1,1,1,1]
df

Statistics¶

Get basic demographics for paper¶

print('Age, Years M(SD): '+ to_precision(df.age.mean(),3) +'('+ to_precision(df.age.std(),3) +')')
print('VR Experience A, 1-7 M(SD): '+ to_precision(df.vr_exp_a.mean(),3) +'('+ to_precision(df.vr_exp_a.std(),3) +')')
print('VR Experience B, 1-7 M(SD): '+ to_precision(df.vr_exp_b.mean(),3) +'('+ to_precision(df.vr_exp_b.std(),3) +')')
print('Music Experience, Years M(SD): '+ to_precision(df.music_years.mean(),3) +'('+ to_precision(df.music_years.std(),3) +')')
print('Computer Music Experience, Years M(SD): '+ to_precision(df.computer_music_years.mean(),3) +'('+ to_precision(df.computer_music_years.std(),3) +')')
print('Genders: M=11, F=1' )

Age, Years M(SD): 32.7(6.06)
VR Experience A, 1-7 M(SD): 4.67(1.30)
VR Experience B, 1-7 M(SD): 4.50(1.31)
Music Experience, Years M(SD): 17.3(7.20)
Computer Music Experience, Years M(SD): 11.1(7.86)
Genders: M=11, F=1

Compute MSI¶

msi_df = df.loc[:, 'UID':'MS02_05' ]
value_list = ['codename', 'vr_exp_a', 'vr_exp_b','music_years','computer_music_years']
msi_df = msi_df.drop(value_list, axis = 1)

# Recode musical training q's
# 0; 1; 2; 3; 4-5; 6-9; 10 or more
# 1  2  3  4    5    6  7
def recode_msi_instrument(years):
    years = int(years)
    if years == 0:
        return 1
    elif years == 1:
        return 2
    elif years == 2:
        return 3
    elif years == 3:
        return  4
    elif 4 <= years <= 5:
        return 5
    elif 6 <= years <= 9:
        return 6
    elif years > 9:
        return 7
    else:
        return 0
assert recode_msi_instrument(-1) == 0
assert recode_msi_instrument(0) == 1
assert recode_msi_instrument(1) == 2
assert recode_msi_instrument(2) == 3
assert recode_msi_instrument(3) == 4
assert recode_msi_instrument(5) == 5
assert recode_msi_instrument(7) == 6
assert recode_msi_instrument(11) == 7

# 0 / 0.5 / 1 / 1.5 / 2 / 3-4 / 5 or more
# 1   2     3    4    5    6    7
def recode_msi_practise(hours):
    if hours == 0:
        return 1
    elif 0 < hours <= 0.5:
        return 2
    elif 0.5 < hours <= 1:
        return 3
    elif 1 < hours <= 1.5:
        return 4
    elif 1.5 < hours <= 2:
        return 5
    elif 2 < hours <= 4:
        return 6
    elif hours > 4:
        return 7
    else:
        return 0

assert recode_msi_practise(-1) == 0
assert recode_msi_practise(0) == 1
assert recode_msi_practise(0.5) == 2
assert recode_msi_practise(1) == 3
assert recode_msi_practise(1.5) == 4
assert recode_msi_practise(2) == 5
assert recode_msi_practise(3.5) == 6
assert recode_msi_practise(5.5) == 7

# 0 / 0.5 / 1 / 2 / 3 / 4-6 / 7 or more
# 1   2     3   4   5   6    7
def recode_msi_theory(years):
    if years == 0:
        return 1
    elif 0 < years <= 0.5:
        return 2
    elif 0.5 < years <= 1:
        return 3
    elif 1 < years <= 2:
        return 4
    elif 2 < years <= 3:
        return 5
    elif 3 < years <= 6:
        return 6
    elif years > 6:
        return 7
    else:
        return 0

assert recode_msi_theory(-1) == 0
assert recode_msi_theory(0) == 1
assert recode_msi_theory(0.5) == 2
assert recode_msi_theory(1) == 3
assert recode_msi_theory(1.5) == 4
assert recode_msi_theory(2) == 4
assert recode_msi_theory(3) == 5
assert recode_msi_theory(5.5) == 6
assert recode_msi_theory(7) == 7

# 0 / 0.5 / 1 / 2 / 3-5 / 6-9 / 10 or more
# 1   2     3   4   5     6     7
def recode_msi_training(years):
    if years == 0:
        return 1
    elif 0 < years <= 0.5:
        return 2
    elif 0.5 < years <= 1:
        return 3
    elif 1 < years <= 2:
        return 4
    elif 2 < years <= 5:
        return 5
    elif 5 < years <= 9:
        return 6
    elif years > 9:
        return 7
    else:
        return 0

assert recode_msi_training(-1) == 0
assert recode_msi_training(0) == 1
assert recode_msi_training(0.5) == 2
assert recode_msi_training(1) == 3
assert recode_msi_training(1.5) == 4
assert recode_msi_training(2) == 4
assert recode_msi_training(3) == 5
assert recode_msi_training(5.5) == 6
assert recode_msi_training(11) == 7

# 0 / 1 / 2 / 3 / 4 / 5 / 6 or more
# 1   2   3   4   5   6   7
def recode_msi_play(number):
    number = int(number)
    if number >= 6:
        return 7
    elif number <= -1:
        return 0
    else:
        return number + 1
assert recode_msi_play(-1) == 0
assert recode_msi_play(0) == 1
assert recode_msi_play(1) == 2
assert recode_msi_play(2) == 3
assert recode_msi_play(3) == 4
assert recode_msi_play(5) == 6
assert recode_msi_play(7) == 7
assert recode_msi_play(11) == 7

msi_df.MS02_01 = msi_df.MS02_01.apply(recode_msi_instrument)
msi_df.MS02_02 = msi_df.MS02_02.apply(recode_msi_practise)
msi_df.MS02_03 = msi_df.MS02_03.apply(recode_msi_theory)
msi_df.MS02_04 = msi_df.MS02_04.apply(recode_msi_training)
msi_df.MS02_05 = msi_df.MS02_05.apply(recode_msi_play)

# Sum items in factors and normalise to 0-1 value
# Perceptual acuity
msi_df['MsiPa'] = (msi_df.MS01_01 + msi_df.MS01_02 + ((msi_df.MS01_03*-1)+8) + msi_df.MS01_04 + ((msi_df.MS01_05*-1)+8)  + msi_df.MS01_07 + msi_df.MS01_08 + ((msi_df.MS01_09*-1)+8) + msi_df.MS01_10 )/63

#  Musical training
msi_df['MsiMt'] = ( ( (msi_df.MS01_06*-1)+8) + ((msi_df.MS01_11*-1)+8) + msi_df.MS02_01 + msi_df.MS02_02 + msi_df.MS02_03 + msi_df.MS02_04 + msi_df.MS02_05)/49

msi_df = msi_df.loc[:, ['UID','age','gender','MsiPa','MsiMt']]
msi_df

base = alt.Chart(msi_df)

points = base.mark_point().encode(
    alt.X('MsiPa:Q', title="MSI Perceptual Acuity"),
    alt.Y('MsiMt:Q', title="MSI Musical Training"),
    color='UID:N',
)

points

pam_df = df.loc[:, 'UID':'gender' ]
scores = df.loc[:, 'PA02_01':'PA02_18']
# for positive items, recode strongly disagree to 0, disagree to 1, neutral to 2, agree to 3, and strongly agree to 4.
scores.iloc[:,[0,1,2,3,5,6,8,9,10,12,13,14,15,16,17]] = scores.iloc[:,[0,1,2,3,5,6,8,9,10,12,13,14,15,16,17]] - 1.0
# for negative items, recode strongly disagree to 4, disagree to 3, neutral to 2, agree to 1, and strongly agree to 0.
scores.iloc[:,[4,7,11]] = scores.iloc[:,[4,7,11]] * -1 + 5
pam_df['pam'] = scores.iloc[:,0:17].sum(axis=1) / 72

value_list = ['codename', 'vr_exp_a', 'vr_exp_b']
pam_df = pam_df.drop(value_list, axis = 1)
pam_df['SID'] = [1,1,2,2,3,3,4,4,5,5,6,6]
pam_df

base = alt.Chart(pam_df)

points = base.mark_bar().encode(
    alt.X('UID:N', title="User ID"),
    alt.Y('pam:Q', title="Personal Acquaintance"),
    color='SID:N'
)

points

	WC04_01	WC04_03	WC04_02	AF01_rgs	SP03_01	SP03_02	SD02_01	PD04_01	MK01	MK01_01	...	PA02_09	PA02_10	PA02_11	PA02_12	PA02_13	PA02_14	PA02_15	PA02_16	PA02_17	PA02_18
0	1	1	allik	1	6	5	46	male	5	2	...	4	4	1	3	1	3	4	1	4	4
1	2	2	lison	1	5	5	26	Female	4	2	...	4	3	1	2	2	2	3	1	4	4
2	1	3	dafat	1	5	5	31	Male	4	1	...	4	2	1	1	1	2	2	1	1	1
3	2	4	brnor	1	4	3	30	MALE	8	2	...	4	4	1	3	1	2	3	1	2	3
4	1	5	rakim	1	5	5	36	male	1	2	...	3	3	1	3	1	2	1	1	1	2

	PID	UID	codename	vr_exp_a	vr_exp_b	age	gender	music_years	computer_music_years	MS01_01	...	PA02_09	PA02_10	PA02_11	PA02_12	PA02_13	PA02_14	PA02_15	PA02_16	PA02_17	PA02_18
0	1	1	allik	6	5	46	1	30	20	6	...	4	4	1	3	1	3	4	1	4	4
1	2	2	lison	5	5	26	0	10	3	6	...	4	3	1	2	2	2	3	1	4	4
2	1	3	dafat	5	5	31	1	16	12	7	...	4	2	1	1	1	2	2	1	1	1
3	2	4	brnor	4	3	30	1	14	7	6	...	4	4	1	3	1	2	3	1	2	3
4	1	5	rakim	5	5	36	1	20	1	5	...	3	3	1	3	1	2	1	1	1	2
5	2	6	gipri	2	2	31	1	20	10	5	...	1	1	1	1	1	1	1	1	1	1
6	2	8	toion	6	6	31	1	20	5	6	...	1	2	1	2	1	2	2	1	1	1
7	1	7	roack	4	4	31	1	17	15	7	...	4	4	3	3	3	3	3	3	3	3
8	2	10	aneme	6	6	23	1	0	0	5	...	1	1	1	1	1	1	1	1	1	1
9	1	9	kaofi	6	6	40	1	20	20	6	...	1	1	1	1	1	1	2	1	1	1
10	1	11	kyzie	4	4	35	1	20	20	7	...	5	5	1	2	5	5	5	1	5	3
11	2	12	keoch	3	3	33	1	20	20	7	...	5	5	1	2	5	5	5	1	5	5

	UID	age	gender	MsiPa	MsiMt
0	1	46	1	0.857143	0.897959
1	2	26	0	0.793651	0.857143
2	3	31	1	0.841270	0.836735
3	4	30	1	0.873016	0.918367
4	5	36	1	0.650794	0.673469
5	6	31	1	0.825397	0.877551
6	8	31	1	0.809524	0.897959
7	7	31	1	1.000000	0.795918
8	10	23	1	0.666667	0.183673
9	9	40	1	0.841270	0.693878
10	11	35	1	1.000000	0.836735
11	12	33	1	0.920635	0.775510

	UID	age	gender	pam	SID
0	1	46	1	0.402778	1
1	2	26	0	0.430556	1
2	3	31	1	0.291667	2
3	4	30	1	0.291667	2
4	5	36	1	0.263889	3
5	6	31	1	0.166667	3
6	8	31	1	0.263889	4
7	7	31	1	0.500000	4
8	10	23	1	0.166667	5
9	9	40	1	0.222222	5
10	11	35	1	0.736111	6
11	12	33	1	0.763889	6