from datetime import datetime
import json
from pandas.io.json import json_normalize
import pandas as pd
import numpy as np
from beakerx import *
import time
def fix_vector(x):
# print(x)
if type(x) == dict:
# print("type(x) was dict")
return np.array(list(x.values()), dtype=float)
elif type(x) == list:
print("type(x) was list")
return np.array(x.values(), dtype=float)
# else:
# print("type(x) was " + str(type(x)))
def parse_teleport_df(user_id,verbose = False):
p1 = str(user_id)
p2 = str(user_id+1)
if verbose:
print("Getting data for " + p1 + " and " + p2)
tf = ['U/X.json','U/X.json']
tf[0] = tf[0].replace("X",p1)
tf[1] = tf[1].replace("X",p2)
sdf = pd.DataFrame() #creates a new dataframe that's empty
if verbose:
print("From " + tf[0] + " and " + tf[1])
for file in tf:
with open(file, 'r') as f:
data = json.load(f)
df = json_normalize(data,record_path=['Events'])
# print(df.columns)
# Fix position data, convert from dict to numpy array
df.endPosition.fillna(value=np.nan, inplace=True)
df.startPosition = df.startPosition.apply(lambda x: fix_vector(x))
df.endPosition = df.endPosition.apply(lambda x: fix_vector(x))
# 2022, Remove mangled cols, think this is from updating python/packages, probs json_normalize???
valuelist = ['startPosition','endPosition']
df = df.drop(columns=valuelist)
# Rename values columns
df = df.rename(columns={"startPosition.value": "startPosition", "endPosition.value": "endPosition"})
# Fix Time
df.dateTime = df.dateTime.apply(lambda x: x.replace("T","-"))
df.dateTime = df.dateTime.apply(lambda x: x.replace(":"," "))
df.dateTime = df.dateTime.apply(lambda x: x.replace("."," "))
df.dateTime = df.dateTime.apply(lambda x: x.replace("-"," "))
# Remove the last millisecond unit
df.dateTime = df.dateTime.apply(lambda x: x[0:-1])
df.dateTime = df.dateTime.apply(lambda x: np.datetime64(datetime.strptime(x, '%Y %m %d %H %M %S %f')))
# Remove redundant cols
valuelist = ['avgVelocity','duration','finalValue','hand','id','initialValue','owner']
df = df.drop(columns=valuelist)
# Parse Teleports
valuelist = ['PlayerConnect','Teleport']
df = df[df.logType.isin(valuelist)]
df = df.reset_index()
df = df.drop(columns='level_0')
# print(df.values)
if int(float(file[2:4])) < 10:
df['uid'] = file[2]
else:
df['uid'] = file[2:4]
df.at[1,'startPosition'] = np.array([0.0,0.0,0.0],dtype=float)
for i in range( 2,len(df.index) ):
df.at[i,'startPosition'] = df.endPosition.iloc[i-1]
sdf = sdf.append(df, ignore_index = True) # ignoring index is optional
sdf = sdf.sort_values('dateTime')
sdf = sdf.reset_index()
sdf = sdf.drop(columns='level_0')
sdf = sdf.rename(columns={"index": "actionId"})
# inter-personal teleport distance
sdf['iptd'] = 0.0
# lookup
lup = {}
lup[p1] = np.array([0.0,0.0,0.0],dtype=float)
lup[p2] = np.array([0.0,0.0,0.0],dtype=float)
for i in range( 2, len(sdf.actionId) ):
# print(i)
# Update last position of player who TELEPORTED
uid = sdf.at[i,'uid']
# 2022, numpy array conversion required.
lup[uid] = np.array(sdf.at[i,'endPosition'],dtype=float)
# print(lup[uid])
# Compute distance from current end position to last position of other player
sdf.at[i,'iptd'] = np.linalg.norm(lup[p1]-lup[p2])
sdf['dt'] = 0
# int(round(sdf.at[0,'dateTime'].timestamp() * 1e3))
for i in range(1,len(sdf.actionId)):
sdf.at[i,'dt'] = sdf.at[i,'dateTime'].timestamp() - sdf.at[0,'dateTime'].timestamp()
return sdf
sdfs = []
for i in range(1,12,2):
sdfs.append(parse_teleport_df(i))
def split_vector(vec):
# print(type(vec))
if isinstance(vec, float):
return np.array_split(np.array([0,0,0], dtype=float),1)
else:
return np.array_split(vec, 1)
def apply_vector(vec):
# print(type(vec))
if isinstance(vec, float):
return np.array([0,0,0], dtype=float)
else:
return np.array(vec, dtype=float)
def split_position_vectors(i):
df = sdfs[i]
df['startPosition'] = df['startPosition'].apply(lambda x: apply_vector(x))
df['endPosition'] = df['endPosition'].apply(lambda x: apply_vector(x))
# From: https://datascienceparichay.com/article/split-pandas-column-of-lists-into-multiple-columns/
# new df from the column of lists
split_df = pd.DataFrame(df['startPosition'].tolist(), columns=['sp_x', 'sp_y', 'sp_z'])
# concat df and split_df
df = pd.concat([df, split_df], axis=1)
# new df from the column of lists
split_df = pd.DataFrame(df['endPosition'].tolist(), columns=['ep_x', 'ep_y', 'ep_z'])
# concat df and split_df
sdfs[i] = pd.concat([df, split_df], axis=1)
def add_sid(i):
sdfs[i] = sdfs[i].assign(sid=i+1)
for i in range(0,6,1):
split_position_vectors(i)
add_sid(i)
# display df
# df
sdfs[0].head()
# print(type(df.at[3,'startPosition']))
# df['spx'], df['spy'], df['spz'] =zip(*df['startPosition'].apply(lambda x: split_vector(x)))
df = sdfs[0]
for i in range(1,6,1):
df = df.append(sdfs[i], ignore_index = False) # ignoring index is optional
df = df.reset_index()
df = df.rename(columns={"index": "session_index"})
# Make sure these cols are numbers not strings.
df['uid'] = df['uid'].astype(int)
df['sid'] = df['sid'].astype(int)
df['actionId'] = df['actionId'].astype(int)
df
[x] Might need some renderer tweaks:
import altair as alt
# Only use teleport data for graphs.
source = df[df['logType']=='Teleport']
base = alt.Chart(source)
xscale = alt.Scale(domain=(-25.0, 25.0))
yscale = alt.Scale(domain=(-25.0, 25.0))
bar_args = {'opacity': .3, 'binSpacing': 0}
points = base.mark_point().encode(
alt.X('ep_x:Q', scale=xscale),
alt.Y('ep_z:Q', scale=yscale),
color='uid:N',
shape='sid:N'
)
top_hist = base.mark_bar(**bar_args).encode(
alt.X('ep_x:Q',
# when using bins, the axis scale is set through
# the bin extent, so we do not specify the scale here
# (which would be ignored anyway)
bin=alt.Bin(maxbins=20, extent=xscale.domain),
stack=None,
title=''
),
alt.Y('count()', stack=None, title=''),
# alt.Color('uid:N'),
).properties(height=60)
right_hist = base.mark_bar(**bar_args).encode(
alt.Y('ep_x:Q',
bin=alt.Bin(maxbins=20, extent=yscale.domain),
stack=None,
title='',
),
alt.X('count()', stack=None, title=''),
# alt.Color('uid:N'),
).properties(width=60)
top_hist & (points | right_hist)
alt.Chart(source).mark_line(interpolate='step-after').encode(
x='dt:Q',
y='iptd:Q',
color='uid:N'
).properties(
width=180,
height=180
).facet(
column='sid:N'
)
Plot shows whether any temporal patterns occur across groups, however individual users could create hotspots by just teleporting lots in the same area
alt.Chart(source).mark_rect().encode(
alt.X('dt:Q', bin=alt.Bin(maxbins=40)),
alt.Y('iptd:Q', bin=alt.Bin(maxbins=40)),
alt.Color('count():Q', scale=alt.Scale(scheme='greenblue'))
)
Positions over time, but too much data in each graph.
alt.Chart(source).mark_line(point=True).encode(
alt.X('ep_x', scale=alt.Scale(zero=True)),
alt.Y('ep_z', scale=alt.Scale(zero=True)),
order='dt',
color='uid:N'
).properties(
width=180,
height=180
).facet(
column='sid:N'
)
All teleport positions for session 3
alt.Chart(source[source.sid == 3]).mark_line(point=True).encode(
alt.X('ep_x', scale=alt.Scale(zero=True)),
alt.Y('ep_z', scale=alt.Scale(zero=True)),
order='dt',
# color='uid'
).properties(
width=180,
height=180
).facet(
column='uid:N'
)
alt.Chart(source[(source.sid == 3) & (source.dt > 1100) & (source.dt < 1300)]).mark_line(point=True).encode(
alt.X('ep_x', scale=alt.Scale(zero=True,domain=(-10,10))),
alt.Y('ep_z', scale=alt.Scale(zero=True,domain=(-10,10))),
order='dt',
tooltip=['hoursminutes(dateTime):T', 'actionId','dt','ep_x','ep_z'],
text=alt.Text('dt', format='.1f')
# color='uid'
).properties(
width=180,
height=180
).facet(
column='uid:N'
)
grp = df[df['logType']=='Teleport']
grp['uid'] = grp['uid'].astype(int)
grp = grp.rename(columns = {
'logType':'Teleport'
})
grp = grp.groupby(['uid']).agg({'Teleport': 'count'})
grp = grp.reset_index()
grp['tfm'] = 0.0
# Vectorised way of doing this if...
# for row in grp.itertuples():
# if row.uid < 11:
# row.tfm = row.Teleport / 30
# else:
# row.tfm = row.Teleport / 18
mask = (grp['uid'] < 11)
grp['tfm'] = np.where(mask, grp.Teleport/30, grp.Teleport/18)
grp
alt.Chart(grp).mark_bar().encode(
x=alt.X('uid:O', axis=alt.Axis(title='User ID',labelAngle=0)),
y=alt.Y('tfm:Q', axis=alt.Axis(title='Teleports per minute'))
)