Wrangling¶

Convert json data into dataframe per session and compute IPTD in process¶

from datetime import datetime
import json
from pandas.io.json import json_normalize
import pandas as pd
import numpy as np
from beakerx import *
import time

def fix_vector(x):
    # print(x)
    if type(x) == dict:
        # print("type(x) was dict")
        return np.array(list(x.values()), dtype=float)
    elif type(x) == list:
        print("type(x) was list")
        return np.array(x.values(), dtype=float)
    # else:
    #     print("type(x) was " + str(type(x)))

def parse_teleport_df(user_id,verbose = False):
    p1 = str(user_id)
    p2 = str(user_id+1)
    if verbose:
        print("Getting data for " + p1 + " and " + p2)
    tf = ['U/X.json','U/X.json']
    tf[0] = tf[0].replace("X",p1)
    tf[1] = tf[1].replace("X",p2)
    sdf = pd.DataFrame() #creates a new dataframe that's empty
    if verbose:
        print("From " + tf[0] + " and " + tf[1])
    for file in tf:
        with open(file, 'r') as f:
             data = json.load(f)

        df = json_normalize(data,record_path=['Events'])
        # print(df.columns)

        # Fix position data, convert from dict to numpy array
        df.endPosition.fillna(value=np.nan, inplace=True)
        df.startPosition = df.startPosition.apply(lambda x: fix_vector(x))        
        df.endPosition = df.endPosition.apply(lambda x: fix_vector(x))

        # 2022, Remove mangled cols, think this is from updating python/packages, probs json_normalize???
        valuelist = ['startPosition','endPosition']
        df = df.drop(columns=valuelist)
        # Rename values columns
        df = df.rename(columns={"startPosition.value": "startPosition", "endPosition.value": "endPosition"})

        # Fix Time
        df.dateTime = df.dateTime.apply(lambda x: x.replace("T","-"))
        df.dateTime = df.dateTime.apply(lambda x: x.replace(":"," "))
        df.dateTime = df.dateTime.apply(lambda x: x.replace("."," "))
        df.dateTime = df.dateTime.apply(lambda x: x.replace("-"," "))
        # Remove the last millisecond unit
        df.dateTime = df.dateTime.apply(lambda x: x[0:-1])
        df.dateTime = df.dateTime.apply(lambda x: np.datetime64(datetime.strptime(x, '%Y %m %d %H %M %S %f')))

        # Remove redundant cols
        valuelist = ['avgVelocity','duration','finalValue','hand','id','initialValue','owner']
        df = df.drop(columns=valuelist)

        # Parse Teleports
        valuelist = ['PlayerConnect','Teleport']
        df = df[df.logType.isin(valuelist)]
        df = df.reset_index()
        df = df.drop(columns='level_0')

        # print(df.values)

        if int(float(file[2:4])) < 10:
            df['uid'] = file[2]
        else:
            df['uid'] = file[2:4]

        df.at[1,'startPosition'] = np.array([0.0,0.0,0.0],dtype=float)
        for i in range( 2,len(df.index) ):
            df.at[i,'startPosition'] = df.endPosition.iloc[i-1]

        sdf = sdf.append(df, ignore_index = True) # ignoring index is optional

    sdf = sdf.sort_values('dateTime')
    sdf = sdf.reset_index()
    sdf = sdf.drop(columns='level_0')
    sdf = sdf.rename(columns={"index": "actionId"})
    # inter-personal teleport distance

    sdf['iptd'] = 0.0

    # lookup
    lup = {}
    lup[p1] = np.array([0.0,0.0,0.0],dtype=float)
    lup[p2] = np.array([0.0,0.0,0.0],dtype=float)

    for i in range( 2, len(sdf.actionId) ):
        # print(i)
        # Update last position of player who TELEPORTED
        uid = sdf.at[i,'uid']
        # 2022, numpy array conversion required.
        lup[uid] = np.array(sdf.at[i,'endPosition'],dtype=float)
        # print(lup[uid])
        # Compute distance from current end position to last position of other player
        sdf.at[i,'iptd'] = np.linalg.norm(lup[p1]-lup[p2])

    sdf['dt'] = 0
    # int(round(sdf.at[0,'dateTime'].timestamp() * 1e3))
    for i in range(1,len(sdf.actionId)):
        sdf.at[i,'dt'] = sdf.at[i,'dateTime'].timestamp() - sdf.at[0,'dateTime'].timestamp()
    
    return sdf

sdfs = []
for i in range(1,12,2):
    sdfs.append(parse_teleport_df(i))

Split position vectors into columns in each dataframe¶

def split_vector(vec):
    # print(type(vec))

    if isinstance(vec, float):
        return np.array_split(np.array([0,0,0], dtype=float),1)
    else:
        return np.array_split(vec, 1)

def apply_vector(vec):
    # print(type(vec))

    if isinstance(vec, float):
        return np.array([0,0,0], dtype=float)
    else:
        return np.array(vec, dtype=float)

def split_position_vectors(i):
    df = sdfs[i]
    df['startPosition'] = df['startPosition'].apply(lambda x: apply_vector(x))
    df['endPosition'] = df['endPosition'].apply(lambda x: apply_vector(x))
    # From: https://datascienceparichay.com/article/split-pandas-column-of-lists-into-multiple-columns/
    # new df from the column of lists
    split_df = pd.DataFrame(df['startPosition'].tolist(), columns=['sp_x', 'sp_y', 'sp_z'])
    # concat df and split_df
    df = pd.concat([df, split_df], axis=1)
    # new df from the column of lists
    split_df = pd.DataFrame(df['endPosition'].tolist(), columns=['ep_x', 'ep_y', 'ep_z'])
    # concat df and split_df
    sdfs[i] = pd.concat([df, split_df], axis=1)

def add_sid(i):
    sdfs[i] = sdfs[i].assign(sid=i+1)

for i in range(0,6,1):
    split_position_vectors(i)
    add_sid(i)
# display df
# df
sdfs[0].head()

# print(type(df.at[3,'startPosition']))
# df['spx'], df['spy'], df['spz'] =zip(*df['startPosition'].apply(lambda x: split_vector(x)))

Wrangle dataframe array into single dataframe¶

df = sdfs[0]
for i in range(1,6,1):
    df = df.append(sdfs[i], ignore_index = False) # ignoring index is optional

df = df.reset_index()
df = df.rename(columns={"index": "session_index"})
# Make sure these cols are numbers not strings.
df['uid'] = df['uid'].astype(int)
df['sid'] = df['sid'].astype(int)
df['actionId'] = df['actionId'].astype(int)
df

Teleport plots¶

[x] Might need some renderer tweaks:

import altair as alt
# Only use teleport data for graphs.
source = df[df['logType']=='Teleport']

Distribution of locations¶

base = alt.Chart(source)

xscale = alt.Scale(domain=(-25.0, 25.0))
yscale = alt.Scale(domain=(-25.0, 25.0))

bar_args = {'opacity': .3, 'binSpacing': 0}

points = base.mark_point().encode(
    alt.X('ep_x:Q', scale=xscale),
    alt.Y('ep_z:Q', scale=yscale),
    color='uid:N',
    shape='sid:N'
)

top_hist = base.mark_bar(**bar_args).encode(
    alt.X('ep_x:Q',
          # when using bins, the axis scale is set through
          # the bin extent, so we do not specify the scale here
          # (which would be ignored anyway)
          bin=alt.Bin(maxbins=20, extent=xscale.domain),
          stack=None,
          title=''
         ),
    alt.Y('count()', stack=None, title=''),
    # alt.Color('uid:N'),
).properties(height=60)

right_hist = base.mark_bar(**bar_args).encode(
    alt.Y('ep_x:Q',
          bin=alt.Bin(maxbins=20, extent=yscale.domain),
          stack=None,
          title='',
         ),
    alt.X('count()', stack=None, title=''),
    # alt.Color('uid:N'),
).properties(width=60)

top_hist & (points | right_hist)

Session-user Teleport Location Plots¶

alt.Chart(source).mark_line(interpolate='step-after').encode(
    x='dt:Q',
    y='iptd:Q',
    color='uid:N'
).properties(
    width=180,
    height=180
).facet(
    column='sid:N'
)

Teleport-time overlaps¶

Plot shows whether any temporal patterns occur across groups, however individual users could create hotspots by just teleporting lots in the same area

alt.Chart(source).mark_rect().encode(
    alt.X('dt:Q', bin=alt.Bin(maxbins=40)),
    alt.Y('iptd:Q', bin=alt.Bin(maxbins=40)),
    alt.Color('count():Q', scale=alt.Scale(scheme='greenblue'))
)

Session teleport locations in sequence.¶

Positions over time, but too much data in each graph.

alt.Chart(source).mark_line(point=True).encode(
    alt.X('ep_x', scale=alt.Scale(zero=True)),
    alt.Y('ep_z', scale=alt.Scale(zero=True)),
    order='dt',
    color='uid:N'
).properties(
    width=180,
    height=180
).facet(
    column='sid:N'
)

Session 3 Teleport Data¶

All teleport positions for session 3

alt.Chart(source[source.sid == 3]).mark_line(point=True).encode(
    alt.X('ep_x', scale=alt.Scale(zero=True)),
    alt.Y('ep_z', scale=alt.Scale(zero=True)),
    order='dt',
    # color='uid'
).properties(
    width=180,
    height=180
).facet(
    column='uid:N'
)

Positions of teleports for S3 assistive action example¶

alt.Chart(source[(source.sid == 3) & (source.dt > 1100) & (source.dt < 1300)]).mark_line(point=True).encode(
    alt.X('ep_x', scale=alt.Scale(zero=True,domain=(-10,10))),
    alt.Y('ep_z', scale=alt.Scale(zero=True,domain=(-10,10))),
    order='dt',
    tooltip=['hoursminutes(dateTime):T', 'actionId','dt','ep_x','ep_z'],
    text=alt.Text('dt', format='.1f')
    # color='uid'
).properties(
    width=180,
    height=180
).facet(
    column='uid:N'
)

Teleport Frequency Info¶

grp = df[df['logType']=='Teleport']
grp['uid'] = grp['uid'].astype(int)
grp = grp.rename(columns = {
    'logType':'Teleport'
})

grp = grp.groupby(['uid']).agg({'Teleport': 'count'})
grp = grp.reset_index()
grp['tfm'] = 0.0
# Vectorised way of doing this if...
# for row in grp.itertuples():
#     if row.uid < 11:
#         row.tfm = row.Teleport / 30
#     else:
#         row.tfm = row.Teleport / 18
mask = (grp['uid'] < 11)
grp['tfm'] = np.where(mask, grp.Teleport/30, grp.Teleport/18)
grp

/Users/thomasdeacon/opt/anaconda3/envs/invoke_xdf/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

alt.Chart(grp).mark_bar().encode(
    x=alt.X('uid:O',  axis=alt.Axis(title='User ID',labelAngle=0)),
    y=alt.Y('tfm:Q', axis=alt.Axis(title='Teleports per minute'))
)

	actionId	dateTime	logType	length	startPosition	endPosition	uid	iptd	dt	sp_x	sp_z	ep_x	ep_z	sid
0	0	2019-08-21 14:09:56.963186	PlayerConnect	NaN	[0.0, 0.0, 0.0]	[0.0, 0.0, 0.0]	1	0.000000	0	0.000000	0.000000	0.000000	0.000000	1
1	0	2019-08-21 14:10:31.829816	PlayerConnect	NaN	[0.0, 0.0, 0.0]	[0.0, 0.0, 0.0]	2	0.000000	34	0.000000	0.000000	0.000000	0.000000	1
2	2	2019-08-21 14:11:23.222446	Teleport	11.409659	[0.0, 0.0, 0.0]	[7.393284, 0.0, 8.896915]	2	11.567876	86	0.000000	0.000000	7.393284	8.896915	1
3	4	2019-08-21 14:11:48.175197	Teleport	7.346649	[7.393284, 0.0, 8.896915]	[2.16486454, 0.0, 3.64042282]	2	4.235483	111	7.393284	8.896915	2.164865	3.640423	1
4	6	2019-08-21 14:12:34.663604	Teleport	15.574819	[2.16486454, 0.0, 3.64042282]	[1.39462256, 0.0, -12.0114765]	2	12.092169	157	2.164865	3.640423	1.394623	-12.011477	1

	session_index	actionId	dateTime	logType	length	startPosition	endPosition	uid	iptd	dt	sp_x	sp_y	sp_z	ep_x	ep_y	ep_z	sid
0	0	0	2019-08-21 14:09:56.963186	PlayerConnect	NaN	[0.0, 0.0, 0.0]	[0.0, 0.0, 0.0]	1	0.000000	0	0.000000	0.0	0.000000	0.000000	0.0	0.000000	1
1	1	0	2019-08-21 14:10:31.829816	PlayerConnect	NaN	[0.0, 0.0, 0.0]	[0.0, 0.0, 0.0]	2	0.000000	34	0.000000	0.0	0.000000	0.000000	0.0	0.000000	1
2	2	2	2019-08-21 14:11:23.222446	Teleport	11.409659	[0.0, 0.0, 0.0]	[7.393284, 0.0, 8.896915]	2	11.567876	86	0.000000	0.0	0.000000	7.393284	0.0	8.896915	1
3	3	4	2019-08-21 14:11:48.175197	Teleport	7.346649	[7.393284, 0.0, 8.896915]	[2.16486454, 0.0, 3.64042282]	2	4.235483	111	7.393284	0.0	8.896915	2.164865	0.0	3.640423	1
4	4	6	2019-08-21 14:12:34.663604	Teleport	15.574819	[2.16486454, 0.0, 3.64042282]	[1.39462256, 0.0, -12.0114765]	2	12.092169	157	2.164865	0.0	3.640423	1.394623	0.0	-12.011477	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
550	31	108	2019-09-05 13:07:02.038217	Teleport	8.610597	[3.716706, 0.0, -6.205044]	[2.85037, 0.0, 2.65972185]	12	3.739220	790	3.716706	0.0	-6.205044	2.850370	0.0	2.659722	6
551	32	144	2019-09-05 13:07:52.584193	Teleport	6.083108	[2.52882743, 0.0, 6.3850913]	[0.3820052, 0.0, 0.2878809]	11	3.423223	841	2.528827	0.0	6.385091	0.382005	0.0	0.287881	6
552	33	121	2019-09-05 13:08:36.169676	Teleport	2.650542	[2.85037, 0.0, 2.65972185]	[0.3415835, 0.0, 1.35904276]	12	1.071924	884	2.850370	0.0	2.659722	0.341583	0.0	1.359043	6
553	34	167	2019-09-05 13:10:43.515260	Teleport	22.687818	[0.3820052, 0.0, 0.2878809]	[16.158741, 0.0, 16.5745926]	11	21.947561	1012	0.382005	0.0	0.287881	16.158741	0.0	16.574593	6
554	35	175	2019-09-05 13:11:17.537750	Teleport	20.394730	[16.158741, 0.0, 16.5745926]	[1.67523384, 0.0, 2.03869152]	11	1.496845	1046	16.158741	0.0	16.574593	1.675234	0.0	2.038692	6

	uid	Teleport	tfm
0	1	7	0.233333
1	2	44	1.466667
2	3	21	0.700000
3	4	35	1.166667
4	5	25	0.833333
5	6	29	0.966667
6	7	41	1.366667
7	8	38	1.266667
8	9	185	6.166667
9	10	84	2.800000
10	11	11	0.611111
11	12	23	1.277778