Checking how things work for this website

6 minute read

To access script used for this post, check out here

import os
from six.moves import urllib
import pandas as pd
import numpy as np

DOWNLOAD_ROOT = "http://s3.amazonaws.com/gamma-datasets/"
TRAFFIC_PATH = "datasets"
TRAFFIC_URL = DOWNLOAD_ROOT + "Speed_Camera_Violations.csv"


## Use following codes for fetching data

def fetch_traffic_data(traffic_url=TRAFFIC_URL, traffic_path=TRAFFIC_PATH):
    if not os.path.isdir(traffic_path):
        os.makedirs(traffic_path)
    csv_path = os.path.join(traffic_path, "Speed_Camera_Violations.csv")
    urllib.request.urlretrieve(traffic_url, csv_path)

    return pd.read_csv(csv_path, parse_dates=[2], index_col=[2])
traffic = fetch_traffic_data()
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS X COORDINATE Y COORDINATE LATITUDE LONGITUDE
VIOLATION DATE
2014-07-01 52.168421 1.157331e+06 1.895851e+06 41.869966 -87.697890
2014-07-02 43.228261 1.157172e+06 1.896513e+06 41.871785 -87.698460
2014-07-03 51.617021 1.157533e+06 1.896956e+06 41.872993 -87.697119
2014-07-04 59.596774 1.159785e+06 1.900856e+06 41.883649 -87.688748
2014-07-05 55.380952 1.160100e+06 1.900902e+06 41.883770 -87.687592
2014-07-06 59.983333 1.159726e+06 1.901113e+06 41.884357 -87.688963
2014-07-07 49.237113 1.157365e+06 1.895910e+06 41.870129 -87.697765
2014-07-08 48.316327 1.157691e+06 1.895995e+06 41.870354 -87.696563
2014-07-09 49.824742 1.157944e+06 1.895733e+06 41.869632 -87.695640
2014-07-10 47.309278 1.157290e+06 1.897096e+06 41.873383 -87.698004
2014-07-11 51.686747 1.158252e+06 1.894460e+06 41.866131 -87.694548
2014-07-12 42.203390 1.160162e+06 1.899432e+06 41.879735 -87.687409
2014-07-13 57.645161 1.160090e+06 1.900417e+06 41.882440 -87.687643
2014-07-14 46.145833 1.157698e+06 1.894575e+06 41.866455 -87.696585
2014-07-15 46.857143 1.158040e+06 1.895050e+06 41.867752 -87.695312
2014-07-16 48.646465 1.158067e+06 1.895417e+06 41.868761 -87.695204
2014-07-17 48.103093 1.158137e+06 1.895128e+06 41.867964 -87.694953
2014-07-18 52.376471 1.158751e+06 1.894366e+06 41.865862 -87.692724
2014-07-19 58.274194 1.160721e+06 1.899518e+06 41.879958 -87.685355
2014-07-20 52.904762 1.160850e+06 1.900023e+06 41.881340 -87.684866
2014-07-21 49.322222 1.159274e+06 1.895277e+06 41.868352 -87.690772
2014-07-22 50.811111 1.159185e+06 1.895274e+06 41.868345 -87.691100
2014-07-23 51.318681 1.159495e+06 1.895639e+06 41.869340 -87.689954
2014-07-24 52.321839 1.159703e+06 1.896104e+06 41.870612 -87.689177
2014-07-25 48.236842 1.159034e+06 1.896310e+06 41.871189 -87.691635
2014-07-26 56.777778 1.161149e+06 1.899982e+06 41.881223 -87.683770
2014-07-27 57.903226 1.160853e+06 1.899920e+06 41.881059 -87.684861
2014-07-28 46.666667 1.159703e+06 1.896104e+06 41.870612 -87.689177
2014-07-29 44.534247 1.160348e+06 1.898468e+06 41.877085 -87.686749
2014-07-30 47.000000 1.160617e+06 1.898516e+06 41.877210 -87.685761
... ... ... ... ... ...
2018-11-24 35.171053 1.159490e+06 1.894617e+06 41.866529 -87.690028
2018-11-25 22.191781 1.158107e+06 1.898191e+06 41.876368 -87.694995
2018-11-26 8.547009 1.159749e+06 1.893199e+06 41.862637 -87.689100
2018-11-27 14.200000 1.159642e+06 1.891924e+06 41.859141 -87.689530
2018-11-28 15.267606 1.159012e+06 1.891869e+06 41.859001 -87.691843
2018-11-29 18.236111 1.158425e+06 1.893154e+06 41.862540 -87.693959
2018-11-30 20.423841 1.158600e+06 1.892246e+06 41.860047 -87.693342
2018-12-01 20.613333 1.159016e+06 1.895342e+06 41.868530 -87.691742
2018-12-02 25.848101 1.159554e+06 1.895372e+06 41.868602 -87.689768
2018-12-03 18.751724 1.158749e+06 1.892040e+06 41.859477 -87.692801
2018-12-04 18.297872 1.159058e+06 1.892202e+06 41.859915 -87.691666
2018-12-05 17.839161 1.158522e+06 1.893111e+06 41.862422 -87.693607
2018-12-06 18.197183 1.158819e+06 1.892125e+06 41.859710 -87.692544
2018-12-07 19.897436 1.159356e+06 1.893968e+06 41.864754 -87.690526
2018-12-08 34.746835 1.159553e+06 1.895374e+06 41.868605 -87.689775
2018-12-09 32.975000 1.159798e+06 1.895477e+06 41.868884 -87.688871
2018-12-10 18.577586 1.159191e+06 1.893608e+06 41.863769 -87.691146
2018-12-11 17.264957 1.159350e+06 1.894265e+06 41.865570 -87.690540
2018-12-12 17.478632 1.158634e+06 1.894483e+06 41.866183 -87.693162
2018-12-13 16.660714 1.158779e+06 1.895139e+06 41.867979 -87.692615
2018-12-14 17.464912 1.158872e+06 1.895000e+06 41.867596 -87.692276
2018-12-15 32.986842 1.159668e+06 1.894936e+06 41.867400 -87.689364
2018-12-16 35.184211 1.159870e+06 1.894878e+06 41.867237 -87.688627
2018-12-17 19.377193 1.158933e+06 1.892744e+06 41.861405 -87.692111
2018-12-18 18.218487 1.159433e+06 1.895158e+06 41.868017 -87.690214
2018-12-19 19.854545 1.159134e+06 1.893288e+06 41.862893 -87.691366
2018-12-20 16.330275 1.157975e+06 1.894762e+06 41.866962 -87.695577
2018-12-21 18.811321 1.157903e+06 1.896157e+06 41.870791 -87.695799
2018-12-22 34.670886 1.159684e+06 1.895026e+06 41.867647 -87.689303
2018-12-23 26.040541 1.158972e+06 1.896172e+06 41.870809 -87.691882

1637 rows × 5 columns

traffic.set_index("CAMERA ID", inplace=True)
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS X COORDINATE Y COORDINATE LATITUDE LONGITUDE
CAMERA ID
CHI003 116.335859 1.147853e+06 1.934275e+06 41.975605 -87.731670
CHI004 34.781192 1.148759e+06 1.933818e+06 41.974333 -87.728347
CHI005 12.046569 1.163048e+06 1.878843e+06 41.823189 -87.677349
CHI007 59.697837 1.161038e+06 1.878964e+06 41.823564 -87.684721
CHI008 19.142241 1.151781e+06 1.898395e+06 41.877071 -87.718168
CHI009 46.374004 1.151845e+06 1.899805e+06 41.880938 -87.717898
CHI010 55.978475 NaN NaN NaN NaN
CHI011 15.503542 1.155438e+06 1.906531e+06 41.899325 -87.704522
CHI013 43.584568 1.182453e+06 1.869712e+06 41.797704 -87.606445
CHI014 58.330670 1.156276e+06 1.858583e+06 41.767732 -87.702738
CHI015 13.998763 1.154378e+06 1.857352e+06 41.764391 -87.709728
CHI018 41.265596 1.156183e+06 1.859145e+06 41.769276 -87.703063
CHI019 40.185117 1.156992e+06 1.894531e+06 41.866364 -87.699143
CHI020 24.795818 1.157040e+06 1.894612e+06 41.866585 -87.698962
CHI021 116.066912 1.157136e+06 1.892362e+06 41.860408 -87.698672
CHI022 9.982166 1.167029e+06 1.830594e+06 41.690702 -87.664122
CHI023 20.836609 1.166994e+06 1.830711e+06 41.691025 -87.664248
CHI024 43.347826 1.154183e+06 1.939669e+06 41.990282 -87.708245
CHI025 17.594213 1.155158e+06 1.934429e+06 41.975884 -87.704801
CHI026 29.622984 1.159788e+06 1.923418e+06 41.945574 -87.688078
CHI027 42.721631 1.159704e+06 1.923559e+06 41.945963 -87.688384
CHI028 67.848101 1.158541e+06 1.923785e+06 41.946608 -87.692652
CHI029 77.253676 1.180985e+06 1.868165e+06 41.793493 -87.611876
CHI030 24.646341 1.150574e+06 1.871490e+06 41.803265 -87.723304
CHI031 58.457074 1.150513e+06 1.870811e+06 41.801402 -87.723545
CHI032 21.004773 1.150881e+06 1.871374e+06 41.802939 -87.722182
CHI033 20.791209 1.176498e+06 1.897411e+06 41.873850 -87.627448
CHI034 15.169788 1.176418e+06 1.897401e+06 41.873824 -87.627741
CHI035 5.924515 1.178587e+06 1.841967e+06 41.721657 -87.621463
CHI036 11.588529 1.178616e+06 1.842048e+06 41.721878 -87.621354
... ... ... ... ... ...
CHI154 4.730534 1.161996e+06 1.836307e+06 41.706487 -87.682391
CHI156 16.366848 1.132581e+06 1.938004e+06 41.986119 -87.787741
CHI157 8.686924 1.149395e+06 1.911608e+06 41.913375 -87.726589
CHI158 29.301351 1.131748e+06 1.936860e+06 41.982995 -87.790832
CHI159 12.723206 1.148867e+06 1.910282e+06 41.909746 -87.728563
CHI160 30.117093 1.149437e+06 1.894438e+06 41.866257 -87.726879
CHI161 12.437132 1.149006e+06 1.910365e+06 41.909972 -87.728050
CHI162 18.089552 1.149923e+06 1.894858e+06 41.867402 -87.725084
CHI163 20.076613 1.149841e+06 1.894931e+06 41.867603 -87.725383
CHI164 6.827181 1.155954e+06 1.867614e+06 41.792520 -87.703676
CHI165 2.479060 1.161996e+06 1.836227e+06 41.706268 -87.682391
CHI166 1.718563 1.155768e+06 1.867930e+06 41.793392 -87.704348
CHI167 2.097500 1.155816e+06 1.868012e+06 41.793615 -87.704170
CHI168 26.908591 1.167170e+06 1.849194e+06 41.741742 -87.663073
CHI169 32.552524 1.167083e+06 1.849460e+06 41.742474 -87.663385
CHI170 6.366040 1.167695e+06 1.849654e+06 41.742993 -87.661138
CHI171 4.818966 1.165145e+06 1.920791e+06 41.938253 -87.668463
CHI172 14.719225 1.165058e+06 1.920991e+06 41.938805 -87.668779
CHI173 40.184932 1.161334e+06 1.871365e+06 41.802706 -87.683845
CHI174 114.339130 1.121895e+06 1.922706e+06 41.944319 -87.827378
CHI175 8.948276 1.121638e+06 1.922907e+06 41.944874 -87.828317
CHI176 11.000000 1.122009e+06 1.923002e+06 41.945129 -87.826952
CHI177 2.961538 1.149783e+06 1.918290e+06 41.931705 -87.724990
CHI178 14.756098 1.148258e+06 1.873075e+06 41.807658 -87.731755
CHI179 5.363636 1.148661e+06 1.873165e+06 41.807896 -87.730276
CHI180 29.145833 1.183268e+06 1.857719e+06 41.764775 -87.603828
CHI181 27.836735 1.182997e+06 1.858058e+06 41.765711 -87.604810
CHI182 58.250000 1.166307e+06 1.908050e+06 41.903268 -87.664557
CHI183 24.083333 1.166381e+06 1.908132e+06 41.903490 -87.664285
CHI184 74.446809 1.182951e+06 1.857908e+06 41.765301 -87.604985

162 rows × 5 columns

traffic2 = fetch_traffic_data()
import datetime as dt
traffic2.sort_index(inplace=True)
# for future reference: below gives month and year
# traffic.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%b %Y'))
# A: weekday, B: month, d: date, Y: year, m: month

traffic2["weekday"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%A'))
traffic2.groupby(['weekday']).sum()
VIOLATIONS X COORDINATE Y COORDINATE LATITUDE LONGITUDE
weekday
Friday 753468 2.969780e+10 4.849572e+10 1.072352e+06 -2.246282e+06
Monday 686724 2.976620e+10 4.860353e+10 1.074738e+06 -2.251272e+06
Saturday 676715 1.893861e+10 3.090150e+10 6.827700e+05 -1.429940e+06
Sunday 663801 1.892335e+10 3.087836e+10 6.822298e+05 -1.428801e+06
Thursday 730603 3.063207e+10 5.002385e+10 1.106136e+06 -2.317053e+06
Tuesday 708276 3.147777e+10 5.140274e+10 1.136724e+06 -2.381161e+06
Wednesday 705136 3.070936e+10 5.014615e+10 1.108929e+06 -2.322930e+06
# Did the number of unique camera increase? it seems yes by a marginal number
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).agg('nunique')["CAMERA ID"].sort_index #['min', 'max', 'count', 'nunique'] possible
<bound method Series.sort_index of Year
2014    143
2015    150
2016    150
2017    150
2018    162
Name: CAMERA ID, dtype: int64>
# Any interesting pattern? Let's find out
traffic2.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170521 entries, 2014-07-01 to 2018-12-23
Data columns (total 10 columns):
ADDRESS         170521 non-null object
CAMERA ID       170521 non-null object
VIOLATIONS      170521 non-null int64
X COORDINATE    163959 non-null float64
Y COORDINATE    163959 non-null float64
LATITUDE        163959 non-null float64
LONGITUDE       163959 non-null float64
LOCATION        163959 non-null object
weekday         170521 non-null object
Year            170521 non-null object
dtypes: float64(4), int64(1), object(5)
memory usage: 19.3+ MB
traffic2.describe()
VIOLATIONS X COORDINATE Y COORDINATE LATITUDE LONGITUDE
count 170521.000000 1.639590e+05 1.639590e+05 163959.000000 163959.000000
mean 28.880449 1.159712e+06 1.893473e+06 41.863389 -87.689229
std 36.636151 1.570560e+04 3.212393e+04 0.088338 0.057135
min 1.000000 1.121638e+06 1.820629e+06 41.663174 -87.828317
25% 7.000000 1.149783e+06 1.868078e+06 41.793493 -87.725084
50% 16.000000 1.159169e+06 1.898488e+06 41.877243 -87.689803
75% 37.000000 1.167170e+06 1.920991e+06 41.939040 -87.662810
max 479.000000 1.203645e+06 1.943342e+06 42.000260 -87.529848
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
location_plot = traffic2['VIOLATIONS'].hist(bins=20)
location_plot.set_title("Frequency and Number of Violations")
location_plot.set_xlabel("Number of Violations per Case")
location_plot.set_ylabel("Frequency of Cases")
Text(0,0.5,'Frequency of Cases')
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).sum()
VIOLATIONS X COORDINATE Y COORDINATE LATITUDE LONGITUDE
Year
2014 659424 1.892601e+10 3.093007e+10 6.832156e+05 -1.430905e+06
2015 1201698 4.221439e+10 6.889669e+10 1.523463e+06 -3.191149e+06
2016 1116805 4.352645e+10 7.105770e+10 1.571194e+06 -3.291152e+06
2017 1015490 4.276128e+10 6.981971e+10 1.543780e+06 -3.233736e+06
2018 931306 4.271703e+10 6.974767e+10 1.542226e+06 -3.230496e+06
pd.to_datetime(traffic2.index.values.astype(float))
traffic2.plot(kind="scatter", x="LONGITUDE", y="LATITUDE", alpha=0.4, label="Traffic Violations per Case",
              figsize=(10,7),s = traffic2["VIOLATIONS"], c=traffic2.index, cmap=plt.get_cmap('Reds'),
              colorbar=True, sharex=False)
<matplotlib.axes._subplots.AxesSubplot at 0x117716828>
Darker color represents newer cases, larger radius represents more violation per case.

H1 Heading

H2 Heading

H3 Heading

basic text.

here’s italics

here’s bold

to get link: link

Bulleted lists:

  • First item
  • Second item
  • Third item

Here’s a numbered list:

  1. first
  2. second
  3. third

simple python block:

    import numpy as np

    def sumup(x,y):
      return np.sum(x,y)

R code block:

    library(tidyverse)
    df <- read_csv("some_file.csv")
    head(df)

inline code ‘x+y’. math: alt

good to go.

Updated: