San Diego Traffic Data

Obtain traffic data from the Smart Street Light monitor at the corner of Cherokee and Adams in San Diego, California and render visualizations. The notebook can be downloaded on GitHub.

Import libraries

In [1]:
import json
import base64
import requests
from pandas.io.json import json_normalize
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# You may need to run "pip3 install PyQt5" for %matplotlib qt to run properly
%matplotlib qt
sns.set(style="whitegrid", palette="tab10")

Define functions:

  • Auth Token (used for all future access)
  • Assets (meta data for the device)
  • Event Data (temporal monitoring data)
In [2]:
def get_client_token(client, secret):
    uri = 'https://auth.aa.cityiq.io/oauth/token'
    #credentials = base64.b64encode(client + ':' + secret)
    string = client + ':' + secret
    credentials = base64.urlsafe_b64encode(string.encode('UTF-8')).decode('ascii')
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Cache-Control': 'no-cache',
        'Authorization': 'Basic ' + credentials
    }
    params = {
        'grant_type': 'client_credentials'
    }

    response = requests.post(uri, headers=headers, params=params)
    return json.loads(response.text)['access_token']
In [3]:
def get_assets(token):
    uri = 'https://sandiego.cityiq.io/api/v2/metadata/assets/search'
    headers = {
       'Content-Type': 'application/x-www-form-urlencoded',
       'Predix-Zone-Id': 'SD-IE-TRAFFIC',
       'Authorization': 'bearer ' + token
       }

    # Bounding Box for monitors in Normal Heights
    params = {
        'bbox': '32.775904:-117.109044,32.755429:-117.130248',
        'page': 0,
        'size': 200,
        'startts': '1578523871727',
        'endts': '1578610271727',
        'q': 'eventTypes:TFEVT'
        }

    response = requests.get(uri, headers=headers, params=params)     
    data = json.loads(response.text)
    return data
In [4]:
def get_event_data(token):
    # uri contans the asset ID for a specific monitor
    asset = '3ea8cf80-d265-4415-8496-d51afa72517d'
    uri = 'https://sandiego.cityiq.io/api/v2/event/assets/' + asset + '/events'

    headers = {
       'Content-Type': 'application/x-www-form-urlencoded',
       'Predix-Zone-Id': 'SD-IE-TRAFFIC',
       'Authorization': 'bearer ' + token
       }

    # Limit to traffic events ina specific time frame
    params = {
        'eventType': 'TFEVT',
        # Dec 8 2019 00:00:00
        'startTime': '1575792000000',
        # Dec 15 2019 00:00:00
        'endTime': '1576396800000',
        'pageSize': '10000'
    }

    response = requests.get(uri, headers=headers, params=params)     
    data = json.loads(response.text)
    return data

Get the token for auth

In [5]:
token = get_client_token('PublicAccess', 'qPKIadEsoHjyh226Snz7')
# print(token)

Get metadata about the monitor(s)

In [6]:
# data = get_assets(token)
# json_normalize(data['content'])

Get data from the monitor(s) and put into dataframe

In [7]:
data = get_event_data(token)
# Data Description
# {'content': [{'locationUid': '3bf5ef59',
#    'assetUid': '3ea8cf80-d265-4415-8496-d51afa72517d',
#    'eventType': 'TFEVT',
#    'timestamp': 1576360801785,
#    'properties': {'speedUnit': 'METERS_PER_SEC',
#     'eventUid': 'MTU3NjM2MDgwMTc4NQ==',
#     'directionUnit': 'DEGREE',
#     'counter_direction_vehicleType': 'small_vehicle',
#     'vehicleType': 'small_vehicle'},
#    'measures': {'counter_direction_speed': 9.228733757230481,
#     'vehicleCount': 4.0,
#     'counter_direction_vehicleCount': 6.0,
#     'counter_direction': 268.0, --- This is West
#     'speed': 10.045135174314495,
#     'direction': 88.0}}, --- This is East
df = pd.DataFrame(json_normalize(data['content']))[['measures.counter_direction', 'measures.counter_direction_speed',\
    'measures.counter_direction_vehicleCount', 'measures.direction', 'measures.speed', 'measures.vehicleCount',\
    'properties.counter_direction_vehicleType', 'properties.vehicleType', 'timestamp']]

Separate East and West bound traffic excluding null data and stack the two sets with renamed columns

In [8]:
df_west = df[df['measures.counter_direction_vehicleCount'] != 0][['measures.counter_direction', 'measures.counter_direction_speed',\
    'measures.counter_direction_vehicleCount','properties.counter_direction_vehicleType',\
    'timestamp']]
df_west = df_west.rename(columns={'measures.counter_direction':'direction', 'measures.counter_direction_speed':'speed',\
    'measures.counter_direction_vehicleCount':'vehicleCount','properties.counter_direction_vehicleType':'vehicleType'})
df_east = df[df['measures.vehicleCount'] != 0][['measures.direction', 'measures.speed',\
    'measures.vehicleCount', 'properties.vehicleType',\
    'timestamp']]
df_east = df_east.rename(columns={'measures.direction':'direction', 'measures.speed':'speed',\
    'measures.vehicleCount':'vehicleCount', 'properties.vehicleType':'vehicleType'})     
df_serial = pd.DataFrame()
df_serial = df_serial.append(df_west, ignore_index=True)
df_serial = df_serial.append(df_east, ignore_index=True)

Convert meters per second to MPH and add readable local date/time

In [9]:
df_serial['mph'] = df_serial['speed'].apply(lambda x: round(x * 2.23694))
df_serial['datetime'] = pd.to_datetime(df_serial['timestamp'] * 1000000)\
    .dt.tz_localize('utc').dt.tz_convert('US/Pacific').dt.floor('s')
df_serial['hour'] = df_serial['datetime'].dt.hour

Group data by hour and remove extreme outliers

In [10]:
# Removing outliers as possible data collection errors
df_group = df_serial[df_serial.mph < 60].groupby(['hour','vehicleType','mph'], as_index=False)['vehicleCount'].sum()

Rendering a relplot where:

  • dot size = average number of vehicles in observation point for time and speed
  • dot color = type of vehicle
  • X/Y = speed and time of day
In [11]:
sns.set_style("whitegrid")
g = sns.relplot(x="hour", y="mph", hue="vehicleType", size=df_group["vehicleCount"]/7,
    sizes=(40, 400), alpha=.5, palette="muted",
    height=6, data=df_group)
plt.xticks([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24])
plt.xlabel('Time of Day', fontsize=12)
g.fig.set_size_inches(15,10)
g.axes[0][0].axhline(y = 25, color='red', linewidth=2, alpha=.7)
plt.text(2, 25, 'Speed Limit', fontsize=14, bbox=dict(facecolor='orange', alpha=1))
Out[11]:
Text(2, 25, 'Speed Limit')

Render a Swarmplot. This does not include third dimension of quantity of vehicles and each point is an observation (across 7 days) which is reasoable since a platoon of vehicles moving through the intersection will all have similar speeds.

In [12]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style("whitegrid")
ax = sns.swarmplot(x="hour", y="mph", hue="vehicleType", data=df_group, size=8)
ax.axes.axhline(y = 25, color='red', linewidth=2, alpha=.7, ls='-')
ax.text(4, 25, 'Speed Limit', fontsize=14, bbox=dict(facecolor='orange', alpha=1))
ax
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x11877fb70>

Create speed bins and render histogram for small vehicles only

In [13]:
bin_groups = [0,5,10,15,20,25,30,35,40,45,50,55,60]
hist = df_group[df_group.vehicleType == 'small_vehicle'].hist(bins=bin_groups, column='mph', figsize=(12,8))
plt.axvline(25, color='r', linestyle='dashed', linewidth=4)
plt.text(21, 10, 'Speed Limit', fontsize=14, bbox=dict(facecolor='orange', alpha=1))
plt.xlabel("Small Vehicle Speed Distribution", fontdict=None, labelpad=None)
plt.ylabel("Number of observations over 7 days", fontdict=None, labelpad=None)
Out[13]:
Text(0, 0.5, 'Number of observations over 7 days')

Box plot the overall data

In [14]:
ax = sns.boxplot(x="vehicleType", y="mph", data=df_group)
plt.text(0.4, 25, 'Speed Limit', fontsize=14, bbox=dict(facecolor='orange', alpha=1))
ax.axes.axhline(y = 25, color='red', linewidth=2, alpha=.7, ls='--')
ax
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x11892d780>

Table of vehicle counts by size and over/under speed limit.

In [15]:
df_summary = df_group.groupby(['vehicleType',df_group.mph > 25], as_index=True)['vehicleCount'].sum()
df_summary = df_summary.to_frame().stack().reset_index().rename(columns={'mph': 'speedLimit', 'level_2': 'toss', 0: 'vehicleCount'})
del df_summary['toss']
df_summary = df_summary.replace({'speedLimit': {False: 'under', True: 'over'}})
df_summary
Out[15]:
vehicleType speedLimit vehicleCount
0 large_vehicle under 338.0
1 large_vehicle over 113.0
2 small_vehicle under 20706.0
3 small_vehicle over 4123.0

Table of vehicles over speed limit by percentage.

In [16]:
var1 = df_summary.loc[(df_summary['vehicleType'] == 'large_vehicle') & (df_summary['speedLimit'] == 'over')].iloc[0,2]
var2 = df_summary.loc[(df_summary['vehicleType'] == 'large_vehicle') & (df_summary['speedLimit'] == 'under')].iloc[0,2]
var3 = df_summary.loc[(df_summary['vehicleType'] == 'small_vehicle') & (df_summary['speedLimit'] == 'over')].iloc[0,2]
var4 = df_summary.loc[(df_summary['vehicleType'] == 'small_vehicle') & (df_summary['speedLimit'] == 'under')].iloc[0,2]

data = [['large_vehicle', round((var1/(var1 + var2)),2)], ['small_vehicle', round((var3/(var3 + var4)),2)]] 
df_pcnt = pd.DataFrame(data, columns = ['vehicleType', 'overSpeedLimit']) 

df_pcnt
Out[16]:
vehicleType overSpeedLimit
0 large_vehicle 0.25
1 small_vehicle 0.17
In [17]:
plt.figure(figsize=(15,6))
ax = sns.boxplot(x="hour", y="mph", hue="vehicleType", data=df_group)
ax.axes.axhline(y = 25, color='red', linewidth=2, alpha=.7, ls='--')
plt.text(-3, 25, 'Speed Limit', fontsize=14, bbox=dict(facecolor='orange', alpha=.5))
ax
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x11892db00>