Checking how things work for this website
To access script used for this post, check out here
import os
from six.moves import urllib
import pandas as pd
import numpy as np
DOWNLOAD_ROOT = "http://s3.amazonaws.com/gamma-datasets/"
TRAFFIC_PATH = "datasets"
TRAFFIC_URL = DOWNLOAD_ROOT + "Speed_Camera_Violations.csv"
## Use following codes for fetching data
def fetch_traffic_data(traffic_url=TRAFFIC_URL, traffic_path=TRAFFIC_PATH):
if not os.path.isdir(traffic_path):
os.makedirs(traffic_path)
csv_path = os.path.join(traffic_path, "Speed_Camera_Violations.csv")
urllib.request.urlretrieve(traffic_url, csv_path)
return pd.read_csv(csv_path, parse_dates=[2], index_col=[2])
traffic = fetch_traffic_data()
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
VIOLATION DATE | |||||
2014-07-01 | 52.168421 | 1.157331e+06 | 1.895851e+06 | 41.869966 | -87.697890 |
2014-07-02 | 43.228261 | 1.157172e+06 | 1.896513e+06 | 41.871785 | -87.698460 |
2014-07-03 | 51.617021 | 1.157533e+06 | 1.896956e+06 | 41.872993 | -87.697119 |
2014-07-04 | 59.596774 | 1.159785e+06 | 1.900856e+06 | 41.883649 | -87.688748 |
2014-07-05 | 55.380952 | 1.160100e+06 | 1.900902e+06 | 41.883770 | -87.687592 |
2014-07-06 | 59.983333 | 1.159726e+06 | 1.901113e+06 | 41.884357 | -87.688963 |
2014-07-07 | 49.237113 | 1.157365e+06 | 1.895910e+06 | 41.870129 | -87.697765 |
2014-07-08 | 48.316327 | 1.157691e+06 | 1.895995e+06 | 41.870354 | -87.696563 |
2014-07-09 | 49.824742 | 1.157944e+06 | 1.895733e+06 | 41.869632 | -87.695640 |
2014-07-10 | 47.309278 | 1.157290e+06 | 1.897096e+06 | 41.873383 | -87.698004 |
2014-07-11 | 51.686747 | 1.158252e+06 | 1.894460e+06 | 41.866131 | -87.694548 |
2014-07-12 | 42.203390 | 1.160162e+06 | 1.899432e+06 | 41.879735 | -87.687409 |
2014-07-13 | 57.645161 | 1.160090e+06 | 1.900417e+06 | 41.882440 | -87.687643 |
2014-07-14 | 46.145833 | 1.157698e+06 | 1.894575e+06 | 41.866455 | -87.696585 |
2014-07-15 | 46.857143 | 1.158040e+06 | 1.895050e+06 | 41.867752 | -87.695312 |
2014-07-16 | 48.646465 | 1.158067e+06 | 1.895417e+06 | 41.868761 | -87.695204 |
2014-07-17 | 48.103093 | 1.158137e+06 | 1.895128e+06 | 41.867964 | -87.694953 |
2014-07-18 | 52.376471 | 1.158751e+06 | 1.894366e+06 | 41.865862 | -87.692724 |
2014-07-19 | 58.274194 | 1.160721e+06 | 1.899518e+06 | 41.879958 | -87.685355 |
2014-07-20 | 52.904762 | 1.160850e+06 | 1.900023e+06 | 41.881340 | -87.684866 |
2014-07-21 | 49.322222 | 1.159274e+06 | 1.895277e+06 | 41.868352 | -87.690772 |
2014-07-22 | 50.811111 | 1.159185e+06 | 1.895274e+06 | 41.868345 | -87.691100 |
2014-07-23 | 51.318681 | 1.159495e+06 | 1.895639e+06 | 41.869340 | -87.689954 |
2014-07-24 | 52.321839 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 |
2014-07-25 | 48.236842 | 1.159034e+06 | 1.896310e+06 | 41.871189 | -87.691635 |
2014-07-26 | 56.777778 | 1.161149e+06 | 1.899982e+06 | 41.881223 | -87.683770 |
2014-07-27 | 57.903226 | 1.160853e+06 | 1.899920e+06 | 41.881059 | -87.684861 |
2014-07-28 | 46.666667 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 |
2014-07-29 | 44.534247 | 1.160348e+06 | 1.898468e+06 | 41.877085 | -87.686749 |
2014-07-30 | 47.000000 | 1.160617e+06 | 1.898516e+06 | 41.877210 | -87.685761 |
... | ... | ... | ... | ... | ... |
2018-11-24 | 35.171053 | 1.159490e+06 | 1.894617e+06 | 41.866529 | -87.690028 |
2018-11-25 | 22.191781 | 1.158107e+06 | 1.898191e+06 | 41.876368 | -87.694995 |
2018-11-26 | 8.547009 | 1.159749e+06 | 1.893199e+06 | 41.862637 | -87.689100 |
2018-11-27 | 14.200000 | 1.159642e+06 | 1.891924e+06 | 41.859141 | -87.689530 |
2018-11-28 | 15.267606 | 1.159012e+06 | 1.891869e+06 | 41.859001 | -87.691843 |
2018-11-29 | 18.236111 | 1.158425e+06 | 1.893154e+06 | 41.862540 | -87.693959 |
2018-11-30 | 20.423841 | 1.158600e+06 | 1.892246e+06 | 41.860047 | -87.693342 |
2018-12-01 | 20.613333 | 1.159016e+06 | 1.895342e+06 | 41.868530 | -87.691742 |
2018-12-02 | 25.848101 | 1.159554e+06 | 1.895372e+06 | 41.868602 | -87.689768 |
2018-12-03 | 18.751724 | 1.158749e+06 | 1.892040e+06 | 41.859477 | -87.692801 |
2018-12-04 | 18.297872 | 1.159058e+06 | 1.892202e+06 | 41.859915 | -87.691666 |
2018-12-05 | 17.839161 | 1.158522e+06 | 1.893111e+06 | 41.862422 | -87.693607 |
2018-12-06 | 18.197183 | 1.158819e+06 | 1.892125e+06 | 41.859710 | -87.692544 |
2018-12-07 | 19.897436 | 1.159356e+06 | 1.893968e+06 | 41.864754 | -87.690526 |
2018-12-08 | 34.746835 | 1.159553e+06 | 1.895374e+06 | 41.868605 | -87.689775 |
2018-12-09 | 32.975000 | 1.159798e+06 | 1.895477e+06 | 41.868884 | -87.688871 |
2018-12-10 | 18.577586 | 1.159191e+06 | 1.893608e+06 | 41.863769 | -87.691146 |
2018-12-11 | 17.264957 | 1.159350e+06 | 1.894265e+06 | 41.865570 | -87.690540 |
2018-12-12 | 17.478632 | 1.158634e+06 | 1.894483e+06 | 41.866183 | -87.693162 |
2018-12-13 | 16.660714 | 1.158779e+06 | 1.895139e+06 | 41.867979 | -87.692615 |
2018-12-14 | 17.464912 | 1.158872e+06 | 1.895000e+06 | 41.867596 | -87.692276 |
2018-12-15 | 32.986842 | 1.159668e+06 | 1.894936e+06 | 41.867400 | -87.689364 |
2018-12-16 | 35.184211 | 1.159870e+06 | 1.894878e+06 | 41.867237 | -87.688627 |
2018-12-17 | 19.377193 | 1.158933e+06 | 1.892744e+06 | 41.861405 | -87.692111 |
2018-12-18 | 18.218487 | 1.159433e+06 | 1.895158e+06 | 41.868017 | -87.690214 |
2018-12-19 | 19.854545 | 1.159134e+06 | 1.893288e+06 | 41.862893 | -87.691366 |
2018-12-20 | 16.330275 | 1.157975e+06 | 1.894762e+06 | 41.866962 | -87.695577 |
2018-12-21 | 18.811321 | 1.157903e+06 | 1.896157e+06 | 41.870791 | -87.695799 |
2018-12-22 | 34.670886 | 1.159684e+06 | 1.895026e+06 | 41.867647 | -87.689303 |
2018-12-23 | 26.040541 | 1.158972e+06 | 1.896172e+06 | 41.870809 | -87.691882 |
1637 rows × 5 columns
traffic.set_index("CAMERA ID", inplace=True)
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
CAMERA ID | |||||
CHI003 | 116.335859 | 1.147853e+06 | 1.934275e+06 | 41.975605 | -87.731670 |
CHI004 | 34.781192 | 1.148759e+06 | 1.933818e+06 | 41.974333 | -87.728347 |
CHI005 | 12.046569 | 1.163048e+06 | 1.878843e+06 | 41.823189 | -87.677349 |
CHI007 | 59.697837 | 1.161038e+06 | 1.878964e+06 | 41.823564 | -87.684721 |
CHI008 | 19.142241 | 1.151781e+06 | 1.898395e+06 | 41.877071 | -87.718168 |
CHI009 | 46.374004 | 1.151845e+06 | 1.899805e+06 | 41.880938 | -87.717898 |
CHI010 | 55.978475 | NaN | NaN | NaN | NaN |
CHI011 | 15.503542 | 1.155438e+06 | 1.906531e+06 | 41.899325 | -87.704522 |
CHI013 | 43.584568 | 1.182453e+06 | 1.869712e+06 | 41.797704 | -87.606445 |
CHI014 | 58.330670 | 1.156276e+06 | 1.858583e+06 | 41.767732 | -87.702738 |
CHI015 | 13.998763 | 1.154378e+06 | 1.857352e+06 | 41.764391 | -87.709728 |
CHI018 | 41.265596 | 1.156183e+06 | 1.859145e+06 | 41.769276 | -87.703063 |
CHI019 | 40.185117 | 1.156992e+06 | 1.894531e+06 | 41.866364 | -87.699143 |
CHI020 | 24.795818 | 1.157040e+06 | 1.894612e+06 | 41.866585 | -87.698962 |
CHI021 | 116.066912 | 1.157136e+06 | 1.892362e+06 | 41.860408 | -87.698672 |
CHI022 | 9.982166 | 1.167029e+06 | 1.830594e+06 | 41.690702 | -87.664122 |
CHI023 | 20.836609 | 1.166994e+06 | 1.830711e+06 | 41.691025 | -87.664248 |
CHI024 | 43.347826 | 1.154183e+06 | 1.939669e+06 | 41.990282 | -87.708245 |
CHI025 | 17.594213 | 1.155158e+06 | 1.934429e+06 | 41.975884 | -87.704801 |
CHI026 | 29.622984 | 1.159788e+06 | 1.923418e+06 | 41.945574 | -87.688078 |
CHI027 | 42.721631 | 1.159704e+06 | 1.923559e+06 | 41.945963 | -87.688384 |
CHI028 | 67.848101 | 1.158541e+06 | 1.923785e+06 | 41.946608 | -87.692652 |
CHI029 | 77.253676 | 1.180985e+06 | 1.868165e+06 | 41.793493 | -87.611876 |
CHI030 | 24.646341 | 1.150574e+06 | 1.871490e+06 | 41.803265 | -87.723304 |
CHI031 | 58.457074 | 1.150513e+06 | 1.870811e+06 | 41.801402 | -87.723545 |
CHI032 | 21.004773 | 1.150881e+06 | 1.871374e+06 | 41.802939 | -87.722182 |
CHI033 | 20.791209 | 1.176498e+06 | 1.897411e+06 | 41.873850 | -87.627448 |
CHI034 | 15.169788 | 1.176418e+06 | 1.897401e+06 | 41.873824 | -87.627741 |
CHI035 | 5.924515 | 1.178587e+06 | 1.841967e+06 | 41.721657 | -87.621463 |
CHI036 | 11.588529 | 1.178616e+06 | 1.842048e+06 | 41.721878 | -87.621354 |
... | ... | ... | ... | ... | ... |
CHI154 | 4.730534 | 1.161996e+06 | 1.836307e+06 | 41.706487 | -87.682391 |
CHI156 | 16.366848 | 1.132581e+06 | 1.938004e+06 | 41.986119 | -87.787741 |
CHI157 | 8.686924 | 1.149395e+06 | 1.911608e+06 | 41.913375 | -87.726589 |
CHI158 | 29.301351 | 1.131748e+06 | 1.936860e+06 | 41.982995 | -87.790832 |
CHI159 | 12.723206 | 1.148867e+06 | 1.910282e+06 | 41.909746 | -87.728563 |
CHI160 | 30.117093 | 1.149437e+06 | 1.894438e+06 | 41.866257 | -87.726879 |
CHI161 | 12.437132 | 1.149006e+06 | 1.910365e+06 | 41.909972 | -87.728050 |
CHI162 | 18.089552 | 1.149923e+06 | 1.894858e+06 | 41.867402 | -87.725084 |
CHI163 | 20.076613 | 1.149841e+06 | 1.894931e+06 | 41.867603 | -87.725383 |
CHI164 | 6.827181 | 1.155954e+06 | 1.867614e+06 | 41.792520 | -87.703676 |
CHI165 | 2.479060 | 1.161996e+06 | 1.836227e+06 | 41.706268 | -87.682391 |
CHI166 | 1.718563 | 1.155768e+06 | 1.867930e+06 | 41.793392 | -87.704348 |
CHI167 | 2.097500 | 1.155816e+06 | 1.868012e+06 | 41.793615 | -87.704170 |
CHI168 | 26.908591 | 1.167170e+06 | 1.849194e+06 | 41.741742 | -87.663073 |
CHI169 | 32.552524 | 1.167083e+06 | 1.849460e+06 | 41.742474 | -87.663385 |
CHI170 | 6.366040 | 1.167695e+06 | 1.849654e+06 | 41.742993 | -87.661138 |
CHI171 | 4.818966 | 1.165145e+06 | 1.920791e+06 | 41.938253 | -87.668463 |
CHI172 | 14.719225 | 1.165058e+06 | 1.920991e+06 | 41.938805 | -87.668779 |
CHI173 | 40.184932 | 1.161334e+06 | 1.871365e+06 | 41.802706 | -87.683845 |
CHI174 | 114.339130 | 1.121895e+06 | 1.922706e+06 | 41.944319 | -87.827378 |
CHI175 | 8.948276 | 1.121638e+06 | 1.922907e+06 | 41.944874 | -87.828317 |
CHI176 | 11.000000 | 1.122009e+06 | 1.923002e+06 | 41.945129 | -87.826952 |
CHI177 | 2.961538 | 1.149783e+06 | 1.918290e+06 | 41.931705 | -87.724990 |
CHI178 | 14.756098 | 1.148258e+06 | 1.873075e+06 | 41.807658 | -87.731755 |
CHI179 | 5.363636 | 1.148661e+06 | 1.873165e+06 | 41.807896 | -87.730276 |
CHI180 | 29.145833 | 1.183268e+06 | 1.857719e+06 | 41.764775 | -87.603828 |
CHI181 | 27.836735 | 1.182997e+06 | 1.858058e+06 | 41.765711 | -87.604810 |
CHI182 | 58.250000 | 1.166307e+06 | 1.908050e+06 | 41.903268 | -87.664557 |
CHI183 | 24.083333 | 1.166381e+06 | 1.908132e+06 | 41.903490 | -87.664285 |
CHI184 | 74.446809 | 1.182951e+06 | 1.857908e+06 | 41.765301 | -87.604985 |
162 rows × 5 columns
traffic2 = fetch_traffic_data()
import datetime as dt
traffic2.sort_index(inplace=True)
# for future reference: below gives month and year
# traffic.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%b %Y'))
# A: weekday, B: month, d: date, Y: year, m: month
traffic2["weekday"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%A'))
traffic2.groupby(['weekday']).sum()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
weekday | |||||
Friday | 753468 | 2.969780e+10 | 4.849572e+10 | 1.072352e+06 | -2.246282e+06 |
Monday | 686724 | 2.976620e+10 | 4.860353e+10 | 1.074738e+06 | -2.251272e+06 |
Saturday | 676715 | 1.893861e+10 | 3.090150e+10 | 6.827700e+05 | -1.429940e+06 |
Sunday | 663801 | 1.892335e+10 | 3.087836e+10 | 6.822298e+05 | -1.428801e+06 |
Thursday | 730603 | 3.063207e+10 | 5.002385e+10 | 1.106136e+06 | -2.317053e+06 |
Tuesday | 708276 | 3.147777e+10 | 5.140274e+10 | 1.136724e+06 | -2.381161e+06 |
Wednesday | 705136 | 3.070936e+10 | 5.014615e+10 | 1.108929e+06 | -2.322930e+06 |
# Did the number of unique camera increase? it seems yes by a marginal number
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).agg('nunique')["CAMERA ID"].sort_index #['min', 'max', 'count', 'nunique'] possible
<bound method Series.sort_index of Year
2014 143
2015 150
2016 150
2017 150
2018 162
Name: CAMERA ID, dtype: int64>
# Any interesting pattern? Let's find out
traffic2.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170521 entries, 2014-07-01 to 2018-12-23
Data columns (total 10 columns):
ADDRESS 170521 non-null object
CAMERA ID 170521 non-null object
VIOLATIONS 170521 non-null int64
X COORDINATE 163959 non-null float64
Y COORDINATE 163959 non-null float64
LATITUDE 163959 non-null float64
LONGITUDE 163959 non-null float64
LOCATION 163959 non-null object
weekday 170521 non-null object
Year 170521 non-null object
dtypes: float64(4), int64(1), object(5)
memory usage: 19.3+ MB
traffic2.describe()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
count | 170521.000000 | 1.639590e+05 | 1.639590e+05 | 163959.000000 | 163959.000000 |
mean | 28.880449 | 1.159712e+06 | 1.893473e+06 | 41.863389 | -87.689229 |
std | 36.636151 | 1.570560e+04 | 3.212393e+04 | 0.088338 | 0.057135 |
min | 1.000000 | 1.121638e+06 | 1.820629e+06 | 41.663174 | -87.828317 |
25% | 7.000000 | 1.149783e+06 | 1.868078e+06 | 41.793493 | -87.725084 |
50% | 16.000000 | 1.159169e+06 | 1.898488e+06 | 41.877243 | -87.689803 |
75% | 37.000000 | 1.167170e+06 | 1.920991e+06 | 41.939040 | -87.662810 |
max | 479.000000 | 1.203645e+06 | 1.943342e+06 | 42.000260 | -87.529848 |
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
location_plot = traffic2['VIOLATIONS'].hist(bins=20)
location_plot.set_title("Frequency and Number of Violations")
location_plot.set_xlabel("Number of Violations per Case")
location_plot.set_ylabel("Frequency of Cases")
Text(0,0.5,'Frequency of Cases')
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).sum()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
Year | |||||
2014 | 659424 | 1.892601e+10 | 3.093007e+10 | 6.832156e+05 | -1.430905e+06 |
2015 | 1201698 | 4.221439e+10 | 6.889669e+10 | 1.523463e+06 | -3.191149e+06 |
2016 | 1116805 | 4.352645e+10 | 7.105770e+10 | 1.571194e+06 | -3.291152e+06 |
2017 | 1015490 | 4.276128e+10 | 6.981971e+10 | 1.543780e+06 | -3.233736e+06 |
2018 | 931306 | 4.271703e+10 | 6.974767e+10 | 1.542226e+06 | -3.230496e+06 |
pd.to_datetime(traffic2.index.values.astype(float))
traffic2.plot(kind="scatter", x="LONGITUDE", y="LATITUDE", alpha=0.4, label="Traffic Violations per Case",
figsize=(10,7),s = traffic2["VIOLATIONS"], c=traffic2.index, cmap=plt.get_cmap('Reds'),
colorbar=True, sharex=False)
<matplotlib.axes._subplots.AxesSubplot at 0x117716828>
H1 Heading
H2 Heading
H3 Heading
basic text.
here’s italics
here’s bold
to get link: link
Bulleted lists:
- First item
- Second item
- Third item
Here’s a numbered list:
- first
- second
- third
simple python block:
import numpy as np
def sumup(x,y):
return np.sum(x,y)
R code block:
library(tidyverse)
df <- read_csv("some_file.csv")
head(df)
inline code ‘x+y’. math:
good to go.