Checking how things work for this website
To access script used for this post, check out here
import os
from six.moves import urllib
import pandas as pd
import numpy as np
DOWNLOAD_ROOT = "http://s3.amazonaws.com/gamma-datasets/"
TRAFFIC_PATH = "datasets"
TRAFFIC_URL = DOWNLOAD_ROOT + "Speed_Camera_Violations.csv"
## Use following codes for fetching data
def fetch_traffic_data(traffic_url=TRAFFIC_URL, traffic_path=TRAFFIC_PATH):
    if not os.path.isdir(traffic_path):
        os.makedirs(traffic_path)
    csv_path = os.path.join(traffic_path, "Speed_Camera_Violations.csv")
    urllib.request.urlretrieve(traffic_url, csv_path)
    return pd.read_csv(csv_path, parse_dates=[2], index_col=[2])
traffic = fetch_traffic_data()
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
| VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|
| VIOLATION DATE | |||||
| 2014-07-01 | 52.168421 | 1.157331e+06 | 1.895851e+06 | 41.869966 | -87.697890 | 
| 2014-07-02 | 43.228261 | 1.157172e+06 | 1.896513e+06 | 41.871785 | -87.698460 | 
| 2014-07-03 | 51.617021 | 1.157533e+06 | 1.896956e+06 | 41.872993 | -87.697119 | 
| 2014-07-04 | 59.596774 | 1.159785e+06 | 1.900856e+06 | 41.883649 | -87.688748 | 
| 2014-07-05 | 55.380952 | 1.160100e+06 | 1.900902e+06 | 41.883770 | -87.687592 | 
| 2014-07-06 | 59.983333 | 1.159726e+06 | 1.901113e+06 | 41.884357 | -87.688963 | 
| 2014-07-07 | 49.237113 | 1.157365e+06 | 1.895910e+06 | 41.870129 | -87.697765 | 
| 2014-07-08 | 48.316327 | 1.157691e+06 | 1.895995e+06 | 41.870354 | -87.696563 | 
| 2014-07-09 | 49.824742 | 1.157944e+06 | 1.895733e+06 | 41.869632 | -87.695640 | 
| 2014-07-10 | 47.309278 | 1.157290e+06 | 1.897096e+06 | 41.873383 | -87.698004 | 
| 2014-07-11 | 51.686747 | 1.158252e+06 | 1.894460e+06 | 41.866131 | -87.694548 | 
| 2014-07-12 | 42.203390 | 1.160162e+06 | 1.899432e+06 | 41.879735 | -87.687409 | 
| 2014-07-13 | 57.645161 | 1.160090e+06 | 1.900417e+06 | 41.882440 | -87.687643 | 
| 2014-07-14 | 46.145833 | 1.157698e+06 | 1.894575e+06 | 41.866455 | -87.696585 | 
| 2014-07-15 | 46.857143 | 1.158040e+06 | 1.895050e+06 | 41.867752 | -87.695312 | 
| 2014-07-16 | 48.646465 | 1.158067e+06 | 1.895417e+06 | 41.868761 | -87.695204 | 
| 2014-07-17 | 48.103093 | 1.158137e+06 | 1.895128e+06 | 41.867964 | -87.694953 | 
| 2014-07-18 | 52.376471 | 1.158751e+06 | 1.894366e+06 | 41.865862 | -87.692724 | 
| 2014-07-19 | 58.274194 | 1.160721e+06 | 1.899518e+06 | 41.879958 | -87.685355 | 
| 2014-07-20 | 52.904762 | 1.160850e+06 | 1.900023e+06 | 41.881340 | -87.684866 | 
| 2014-07-21 | 49.322222 | 1.159274e+06 | 1.895277e+06 | 41.868352 | -87.690772 | 
| 2014-07-22 | 50.811111 | 1.159185e+06 | 1.895274e+06 | 41.868345 | -87.691100 | 
| 2014-07-23 | 51.318681 | 1.159495e+06 | 1.895639e+06 | 41.869340 | -87.689954 | 
| 2014-07-24 | 52.321839 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 | 
| 2014-07-25 | 48.236842 | 1.159034e+06 | 1.896310e+06 | 41.871189 | -87.691635 | 
| 2014-07-26 | 56.777778 | 1.161149e+06 | 1.899982e+06 | 41.881223 | -87.683770 | 
| 2014-07-27 | 57.903226 | 1.160853e+06 | 1.899920e+06 | 41.881059 | -87.684861 | 
| 2014-07-28 | 46.666667 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 | 
| 2014-07-29 | 44.534247 | 1.160348e+06 | 1.898468e+06 | 41.877085 | -87.686749 | 
| 2014-07-30 | 47.000000 | 1.160617e+06 | 1.898516e+06 | 41.877210 | -87.685761 | 
| ... | ... | ... | ... | ... | ... | 
| 2018-11-24 | 35.171053 | 1.159490e+06 | 1.894617e+06 | 41.866529 | -87.690028 | 
| 2018-11-25 | 22.191781 | 1.158107e+06 | 1.898191e+06 | 41.876368 | -87.694995 | 
| 2018-11-26 | 8.547009 | 1.159749e+06 | 1.893199e+06 | 41.862637 | -87.689100 | 
| 2018-11-27 | 14.200000 | 1.159642e+06 | 1.891924e+06 | 41.859141 | -87.689530 | 
| 2018-11-28 | 15.267606 | 1.159012e+06 | 1.891869e+06 | 41.859001 | -87.691843 | 
| 2018-11-29 | 18.236111 | 1.158425e+06 | 1.893154e+06 | 41.862540 | -87.693959 | 
| 2018-11-30 | 20.423841 | 1.158600e+06 | 1.892246e+06 | 41.860047 | -87.693342 | 
| 2018-12-01 | 20.613333 | 1.159016e+06 | 1.895342e+06 | 41.868530 | -87.691742 | 
| 2018-12-02 | 25.848101 | 1.159554e+06 | 1.895372e+06 | 41.868602 | -87.689768 | 
| 2018-12-03 | 18.751724 | 1.158749e+06 | 1.892040e+06 | 41.859477 | -87.692801 | 
| 2018-12-04 | 18.297872 | 1.159058e+06 | 1.892202e+06 | 41.859915 | -87.691666 | 
| 2018-12-05 | 17.839161 | 1.158522e+06 | 1.893111e+06 | 41.862422 | -87.693607 | 
| 2018-12-06 | 18.197183 | 1.158819e+06 | 1.892125e+06 | 41.859710 | -87.692544 | 
| 2018-12-07 | 19.897436 | 1.159356e+06 | 1.893968e+06 | 41.864754 | -87.690526 | 
| 2018-12-08 | 34.746835 | 1.159553e+06 | 1.895374e+06 | 41.868605 | -87.689775 | 
| 2018-12-09 | 32.975000 | 1.159798e+06 | 1.895477e+06 | 41.868884 | -87.688871 | 
| 2018-12-10 | 18.577586 | 1.159191e+06 | 1.893608e+06 | 41.863769 | -87.691146 | 
| 2018-12-11 | 17.264957 | 1.159350e+06 | 1.894265e+06 | 41.865570 | -87.690540 | 
| 2018-12-12 | 17.478632 | 1.158634e+06 | 1.894483e+06 | 41.866183 | -87.693162 | 
| 2018-12-13 | 16.660714 | 1.158779e+06 | 1.895139e+06 | 41.867979 | -87.692615 | 
| 2018-12-14 | 17.464912 | 1.158872e+06 | 1.895000e+06 | 41.867596 | -87.692276 | 
| 2018-12-15 | 32.986842 | 1.159668e+06 | 1.894936e+06 | 41.867400 | -87.689364 | 
| 2018-12-16 | 35.184211 | 1.159870e+06 | 1.894878e+06 | 41.867237 | -87.688627 | 
| 2018-12-17 | 19.377193 | 1.158933e+06 | 1.892744e+06 | 41.861405 | -87.692111 | 
| 2018-12-18 | 18.218487 | 1.159433e+06 | 1.895158e+06 | 41.868017 | -87.690214 | 
| 2018-12-19 | 19.854545 | 1.159134e+06 | 1.893288e+06 | 41.862893 | -87.691366 | 
| 2018-12-20 | 16.330275 | 1.157975e+06 | 1.894762e+06 | 41.866962 | -87.695577 | 
| 2018-12-21 | 18.811321 | 1.157903e+06 | 1.896157e+06 | 41.870791 | -87.695799 | 
| 2018-12-22 | 34.670886 | 1.159684e+06 | 1.895026e+06 | 41.867647 | -87.689303 | 
| 2018-12-23 | 26.040541 | 1.158972e+06 | 1.896172e+06 | 41.870809 | -87.691882 | 
1637 rows × 5 columns
traffic.set_index("CAMERA ID", inplace=True)
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
| VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|
| CAMERA ID | |||||
| CHI003 | 116.335859 | 1.147853e+06 | 1.934275e+06 | 41.975605 | -87.731670 | 
| CHI004 | 34.781192 | 1.148759e+06 | 1.933818e+06 | 41.974333 | -87.728347 | 
| CHI005 | 12.046569 | 1.163048e+06 | 1.878843e+06 | 41.823189 | -87.677349 | 
| CHI007 | 59.697837 | 1.161038e+06 | 1.878964e+06 | 41.823564 | -87.684721 | 
| CHI008 | 19.142241 | 1.151781e+06 | 1.898395e+06 | 41.877071 | -87.718168 | 
| CHI009 | 46.374004 | 1.151845e+06 | 1.899805e+06 | 41.880938 | -87.717898 | 
| CHI010 | 55.978475 | NaN | NaN | NaN | NaN | 
| CHI011 | 15.503542 | 1.155438e+06 | 1.906531e+06 | 41.899325 | -87.704522 | 
| CHI013 | 43.584568 | 1.182453e+06 | 1.869712e+06 | 41.797704 | -87.606445 | 
| CHI014 | 58.330670 | 1.156276e+06 | 1.858583e+06 | 41.767732 | -87.702738 | 
| CHI015 | 13.998763 | 1.154378e+06 | 1.857352e+06 | 41.764391 | -87.709728 | 
| CHI018 | 41.265596 | 1.156183e+06 | 1.859145e+06 | 41.769276 | -87.703063 | 
| CHI019 | 40.185117 | 1.156992e+06 | 1.894531e+06 | 41.866364 | -87.699143 | 
| CHI020 | 24.795818 | 1.157040e+06 | 1.894612e+06 | 41.866585 | -87.698962 | 
| CHI021 | 116.066912 | 1.157136e+06 | 1.892362e+06 | 41.860408 | -87.698672 | 
| CHI022 | 9.982166 | 1.167029e+06 | 1.830594e+06 | 41.690702 | -87.664122 | 
| CHI023 | 20.836609 | 1.166994e+06 | 1.830711e+06 | 41.691025 | -87.664248 | 
| CHI024 | 43.347826 | 1.154183e+06 | 1.939669e+06 | 41.990282 | -87.708245 | 
| CHI025 | 17.594213 | 1.155158e+06 | 1.934429e+06 | 41.975884 | -87.704801 | 
| CHI026 | 29.622984 | 1.159788e+06 | 1.923418e+06 | 41.945574 | -87.688078 | 
| CHI027 | 42.721631 | 1.159704e+06 | 1.923559e+06 | 41.945963 | -87.688384 | 
| CHI028 | 67.848101 | 1.158541e+06 | 1.923785e+06 | 41.946608 | -87.692652 | 
| CHI029 | 77.253676 | 1.180985e+06 | 1.868165e+06 | 41.793493 | -87.611876 | 
| CHI030 | 24.646341 | 1.150574e+06 | 1.871490e+06 | 41.803265 | -87.723304 | 
| CHI031 | 58.457074 | 1.150513e+06 | 1.870811e+06 | 41.801402 | -87.723545 | 
| CHI032 | 21.004773 | 1.150881e+06 | 1.871374e+06 | 41.802939 | -87.722182 | 
| CHI033 | 20.791209 | 1.176498e+06 | 1.897411e+06 | 41.873850 | -87.627448 | 
| CHI034 | 15.169788 | 1.176418e+06 | 1.897401e+06 | 41.873824 | -87.627741 | 
| CHI035 | 5.924515 | 1.178587e+06 | 1.841967e+06 | 41.721657 | -87.621463 | 
| CHI036 | 11.588529 | 1.178616e+06 | 1.842048e+06 | 41.721878 | -87.621354 | 
| ... | ... | ... | ... | ... | ... | 
| CHI154 | 4.730534 | 1.161996e+06 | 1.836307e+06 | 41.706487 | -87.682391 | 
| CHI156 | 16.366848 | 1.132581e+06 | 1.938004e+06 | 41.986119 | -87.787741 | 
| CHI157 | 8.686924 | 1.149395e+06 | 1.911608e+06 | 41.913375 | -87.726589 | 
| CHI158 | 29.301351 | 1.131748e+06 | 1.936860e+06 | 41.982995 | -87.790832 | 
| CHI159 | 12.723206 | 1.148867e+06 | 1.910282e+06 | 41.909746 | -87.728563 | 
| CHI160 | 30.117093 | 1.149437e+06 | 1.894438e+06 | 41.866257 | -87.726879 | 
| CHI161 | 12.437132 | 1.149006e+06 | 1.910365e+06 | 41.909972 | -87.728050 | 
| CHI162 | 18.089552 | 1.149923e+06 | 1.894858e+06 | 41.867402 | -87.725084 | 
| CHI163 | 20.076613 | 1.149841e+06 | 1.894931e+06 | 41.867603 | -87.725383 | 
| CHI164 | 6.827181 | 1.155954e+06 | 1.867614e+06 | 41.792520 | -87.703676 | 
| CHI165 | 2.479060 | 1.161996e+06 | 1.836227e+06 | 41.706268 | -87.682391 | 
| CHI166 | 1.718563 | 1.155768e+06 | 1.867930e+06 | 41.793392 | -87.704348 | 
| CHI167 | 2.097500 | 1.155816e+06 | 1.868012e+06 | 41.793615 | -87.704170 | 
| CHI168 | 26.908591 | 1.167170e+06 | 1.849194e+06 | 41.741742 | -87.663073 | 
| CHI169 | 32.552524 | 1.167083e+06 | 1.849460e+06 | 41.742474 | -87.663385 | 
| CHI170 | 6.366040 | 1.167695e+06 | 1.849654e+06 | 41.742993 | -87.661138 | 
| CHI171 | 4.818966 | 1.165145e+06 | 1.920791e+06 | 41.938253 | -87.668463 | 
| CHI172 | 14.719225 | 1.165058e+06 | 1.920991e+06 | 41.938805 | -87.668779 | 
| CHI173 | 40.184932 | 1.161334e+06 | 1.871365e+06 | 41.802706 | -87.683845 | 
| CHI174 | 114.339130 | 1.121895e+06 | 1.922706e+06 | 41.944319 | -87.827378 | 
| CHI175 | 8.948276 | 1.121638e+06 | 1.922907e+06 | 41.944874 | -87.828317 | 
| CHI176 | 11.000000 | 1.122009e+06 | 1.923002e+06 | 41.945129 | -87.826952 | 
| CHI177 | 2.961538 | 1.149783e+06 | 1.918290e+06 | 41.931705 | -87.724990 | 
| CHI178 | 14.756098 | 1.148258e+06 | 1.873075e+06 | 41.807658 | -87.731755 | 
| CHI179 | 5.363636 | 1.148661e+06 | 1.873165e+06 | 41.807896 | -87.730276 | 
| CHI180 | 29.145833 | 1.183268e+06 | 1.857719e+06 | 41.764775 | -87.603828 | 
| CHI181 | 27.836735 | 1.182997e+06 | 1.858058e+06 | 41.765711 | -87.604810 | 
| CHI182 | 58.250000 | 1.166307e+06 | 1.908050e+06 | 41.903268 | -87.664557 | 
| CHI183 | 24.083333 | 1.166381e+06 | 1.908132e+06 | 41.903490 | -87.664285 | 
| CHI184 | 74.446809 | 1.182951e+06 | 1.857908e+06 | 41.765301 | -87.604985 | 
162 rows × 5 columns
traffic2 = fetch_traffic_data()
import datetime as dt
traffic2.sort_index(inplace=True)
# for future reference: below gives month and year
# traffic.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%b %Y'))
# A: weekday, B: month, d: date, Y: year, m: month
traffic2["weekday"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%A'))
traffic2.groupby(['weekday']).sum()
| VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|
| weekday | |||||
| Friday | 753468 | 2.969780e+10 | 4.849572e+10 | 1.072352e+06 | -2.246282e+06 | 
| Monday | 686724 | 2.976620e+10 | 4.860353e+10 | 1.074738e+06 | -2.251272e+06 | 
| Saturday | 676715 | 1.893861e+10 | 3.090150e+10 | 6.827700e+05 | -1.429940e+06 | 
| Sunday | 663801 | 1.892335e+10 | 3.087836e+10 | 6.822298e+05 | -1.428801e+06 | 
| Thursday | 730603 | 3.063207e+10 | 5.002385e+10 | 1.106136e+06 | -2.317053e+06 | 
| Tuesday | 708276 | 3.147777e+10 | 5.140274e+10 | 1.136724e+06 | -2.381161e+06 | 
| Wednesday | 705136 | 3.070936e+10 | 5.014615e+10 | 1.108929e+06 | -2.322930e+06 | 
# Did the number of unique camera increase? it seems yes by a marginal number
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).agg('nunique')["CAMERA ID"].sort_index #['min', 'max', 'count', 'nunique'] possible
<bound method Series.sort_index of Year
2014    143
2015    150
2016    150
2017    150
2018    162
Name: CAMERA ID, dtype: int64>
# Any interesting pattern? Let's find out
traffic2.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170521 entries, 2014-07-01 to 2018-12-23
Data columns (total 10 columns):
ADDRESS         170521 non-null object
CAMERA ID       170521 non-null object
VIOLATIONS      170521 non-null int64
X COORDINATE    163959 non-null float64
Y COORDINATE    163959 non-null float64
LATITUDE        163959 non-null float64
LONGITUDE       163959 non-null float64
LOCATION        163959 non-null object
weekday         170521 non-null object
Year            170521 non-null object
dtypes: float64(4), int64(1), object(5)
memory usage: 19.3+ MB
traffic2.describe()
| VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|
| count | 170521.000000 | 1.639590e+05 | 1.639590e+05 | 163959.000000 | 163959.000000 | 
| mean | 28.880449 | 1.159712e+06 | 1.893473e+06 | 41.863389 | -87.689229 | 
| std | 36.636151 | 1.570560e+04 | 3.212393e+04 | 0.088338 | 0.057135 | 
| min | 1.000000 | 1.121638e+06 | 1.820629e+06 | 41.663174 | -87.828317 | 
| 25% | 7.000000 | 1.149783e+06 | 1.868078e+06 | 41.793493 | -87.725084 | 
| 50% | 16.000000 | 1.159169e+06 | 1.898488e+06 | 41.877243 | -87.689803 | 
| 75% | 37.000000 | 1.167170e+06 | 1.920991e+06 | 41.939040 | -87.662810 | 
| max | 479.000000 | 1.203645e+06 | 1.943342e+06 | 42.000260 | -87.529848 | 
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
location_plot = traffic2['VIOLATIONS'].hist(bins=20)
location_plot.set_title("Frequency and Number of Violations")
location_plot.set_xlabel("Number of Violations per Case")
location_plot.set_ylabel("Frequency of Cases")
Text(0,0.5,'Frequency of Cases')
 
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).sum()
| VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
|---|---|---|---|---|---|
| Year | |||||
| 2014 | 659424 | 1.892601e+10 | 3.093007e+10 | 6.832156e+05 | -1.430905e+06 | 
| 2015 | 1201698 | 4.221439e+10 | 6.889669e+10 | 1.523463e+06 | -3.191149e+06 | 
| 2016 | 1116805 | 4.352645e+10 | 7.105770e+10 | 1.571194e+06 | -3.291152e+06 | 
| 2017 | 1015490 | 4.276128e+10 | 6.981971e+10 | 1.543780e+06 | -3.233736e+06 | 
| 2018 | 931306 | 4.271703e+10 | 6.974767e+10 | 1.542226e+06 | -3.230496e+06 | 
pd.to_datetime(traffic2.index.values.astype(float))
traffic2.plot(kind="scatter", x="LONGITUDE", y="LATITUDE", alpha=0.4, label="Traffic Violations per Case",
              figsize=(10,7),s = traffic2["VIOLATIONS"], c=traffic2.index, cmap=plt.get_cmap('Reds'),
              colorbar=True, sharex=False)
<matplotlib.axes._subplots.AxesSubplot at 0x117716828>
 
H1 Heading
H2 Heading
H3 Heading
basic text.
here’s italics
here’s bold
to get link: link
Bulleted lists:
- First item
- Second item
- Third item
Here’s a numbered list:
- first
- second
- third
simple python block:
    import numpy as np
    def sumup(x,y):
      return np.sum(x,y)
R code block:
    library(tidyverse)
    df <- read_csv("some_file.csv")
    head(df)
inline code ‘x+y’.
math: 

good to go.