Checking how things work
import os
from six.moves import urllib
import pandas as pd
import numpy as np
DOWNLOAD_ROOT = "http://s3.amazonaws.com/gamma-datasets/"
TRAFFIC_PATH = "datasets"
TRAFFIC_URL = DOWNLOAD_ROOT + "Speed_Camera_Violations.csv"
## Use following codes for fetching data
def fetch_traffic_data(traffic_url=TRAFFIC_URL, traffic_path=TRAFFIC_PATH):
if not os.path.isdir(traffic_path):
os.makedirs(traffic_path)
csv_path = os.path.join(traffic_path, "Speed_Camera_Violations.csv")
urllib.request.urlretrieve(traffic_url, csv_path)
return pd.read_csv(csv_path, parse_dates=[2], index_col=[2])
traffic = fetch_traffic_data()
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
VIOLATION DATE | |||||
2014-07-01 | 52.168421 | 1.157331e+06 | 1.895851e+06 | 41.869966 | -87.697890 |
2014-07-02 | 43.228261 | 1.157172e+06 | 1.896513e+06 | 41.871785 | -87.698460 |
2014-07-03 | 51.617021 | 1.157533e+06 | 1.896956e+06 | 41.872993 | -87.697119 |
2014-07-04 | 59.596774 | 1.159785e+06 | 1.900856e+06 | 41.883649 | -87.688748 |
2014-07-05 | 55.380952 | 1.160100e+06 | 1.900902e+06 | 41.883770 | -87.687592 |
2014-07-06 | 59.983333 | 1.159726e+06 | 1.901113e+06 | 41.884357 | -87.688963 |
2014-07-07 | 49.237113 | 1.157365e+06 | 1.895910e+06 | 41.870129 | -87.697765 |
2014-07-08 | 48.316327 | 1.157691e+06 | 1.895995e+06 | 41.870354 | -87.696563 |
2014-07-09 | 49.824742 | 1.157944e+06 | 1.895733e+06 | 41.869632 | -87.695640 |
2014-07-10 | 47.309278 | 1.157290e+06 | 1.897096e+06 | 41.873383 | -87.698004 |
2014-07-11 | 51.686747 | 1.158252e+06 | 1.894460e+06 | 41.866131 | -87.694548 |
2014-07-12 | 42.203390 | 1.160162e+06 | 1.899432e+06 | 41.879735 | -87.687409 |
2014-07-13 | 57.645161 | 1.160090e+06 | 1.900417e+06 | 41.882440 | -87.687643 |
2014-07-14 | 46.145833 | 1.157698e+06 | 1.894575e+06 | 41.866455 | -87.696585 |
2014-07-15 | 46.857143 | 1.158040e+06 | 1.895050e+06 | 41.867752 | -87.695312 |
2014-07-16 | 48.646465 | 1.158067e+06 | 1.895417e+06 | 41.868761 | -87.695204 |
2014-07-17 | 48.103093 | 1.158137e+06 | 1.895128e+06 | 41.867964 | -87.694953 |
2014-07-18 | 52.376471 | 1.158751e+06 | 1.894366e+06 | 41.865862 | -87.692724 |
2014-07-19 | 58.274194 | 1.160721e+06 | 1.899518e+06 | 41.879958 | -87.685355 |
2014-07-20 | 52.904762 | 1.160850e+06 | 1.900023e+06 | 41.881340 | -87.684866 |
2014-07-21 | 49.322222 | 1.159274e+06 | 1.895277e+06 | 41.868352 | -87.690772 |
2014-07-22 | 50.811111 | 1.159185e+06 | 1.895274e+06 | 41.868345 | -87.691100 |
2014-07-23 | 51.318681 | 1.159495e+06 | 1.895639e+06 | 41.869340 | -87.689954 |
2014-07-24 | 52.321839 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 |
2014-07-25 | 48.236842 | 1.159034e+06 | 1.896310e+06 | 41.871189 | -87.691635 |
2014-07-26 | 56.777778 | 1.161149e+06 | 1.899982e+06 | 41.881223 | -87.683770 |
2014-07-27 | 57.903226 | 1.160853e+06 | 1.899920e+06 | 41.881059 | -87.684861 |
2014-07-28 | 46.666667 | 1.159703e+06 | 1.896104e+06 | 41.870612 | -87.689177 |
2014-07-29 | 44.534247 | 1.160348e+06 | 1.898468e+06 | 41.877085 | -87.686749 |
2014-07-30 | 47.000000 | 1.160617e+06 | 1.898516e+06 | 41.877210 | -87.685761 |
... | ... | ... | ... | ... | ... |
2018-11-24 | 35.171053 | 1.159490e+06 | 1.894617e+06 | 41.866529 | -87.690028 |
2018-11-25 | 22.191781 | 1.158107e+06 | 1.898191e+06 | 41.876368 | -87.694995 |
2018-11-26 | 8.547009 | 1.159749e+06 | 1.893199e+06 | 41.862637 | -87.689100 |
2018-11-27 | 14.200000 | 1.159642e+06 | 1.891924e+06 | 41.859141 | -87.689530 |
2018-11-28 | 15.267606 | 1.159012e+06 | 1.891869e+06 | 41.859001 | -87.691843 |
2018-11-29 | 18.236111 | 1.158425e+06 | 1.893154e+06 | 41.862540 | -87.693959 |
2018-11-30 | 20.423841 | 1.158600e+06 | 1.892246e+06 | 41.860047 | -87.693342 |
2018-12-01 | 20.613333 | 1.159016e+06 | 1.895342e+06 | 41.868530 | -87.691742 |
2018-12-02 | 25.848101 | 1.159554e+06 | 1.895372e+06 | 41.868602 | -87.689768 |
2018-12-03 | 18.751724 | 1.158749e+06 | 1.892040e+06 | 41.859477 | -87.692801 |
2018-12-04 | 18.297872 | 1.159058e+06 | 1.892202e+06 | 41.859915 | -87.691666 |
2018-12-05 | 17.839161 | 1.158522e+06 | 1.893111e+06 | 41.862422 | -87.693607 |
2018-12-06 | 18.197183 | 1.158819e+06 | 1.892125e+06 | 41.859710 | -87.692544 |
2018-12-07 | 19.897436 | 1.159356e+06 | 1.893968e+06 | 41.864754 | -87.690526 |
2018-12-08 | 34.746835 | 1.159553e+06 | 1.895374e+06 | 41.868605 | -87.689775 |
2018-12-09 | 32.975000 | 1.159798e+06 | 1.895477e+06 | 41.868884 | -87.688871 |
2018-12-10 | 18.577586 | 1.159191e+06 | 1.893608e+06 | 41.863769 | -87.691146 |
2018-12-11 | 17.264957 | 1.159350e+06 | 1.894265e+06 | 41.865570 | -87.690540 |
2018-12-12 | 17.478632 | 1.158634e+06 | 1.894483e+06 | 41.866183 | -87.693162 |
2018-12-13 | 16.660714 | 1.158779e+06 | 1.895139e+06 | 41.867979 | -87.692615 |
2018-12-14 | 17.464912 | 1.158872e+06 | 1.895000e+06 | 41.867596 | -87.692276 |
2018-12-15 | 32.986842 | 1.159668e+06 | 1.894936e+06 | 41.867400 | -87.689364 |
2018-12-16 | 35.184211 | 1.159870e+06 | 1.894878e+06 | 41.867237 | -87.688627 |
2018-12-17 | 19.377193 | 1.158933e+06 | 1.892744e+06 | 41.861405 | -87.692111 |
2018-12-18 | 18.218487 | 1.159433e+06 | 1.895158e+06 | 41.868017 | -87.690214 |
2018-12-19 | 19.854545 | 1.159134e+06 | 1.893288e+06 | 41.862893 | -87.691366 |
2018-12-20 | 16.330275 | 1.157975e+06 | 1.894762e+06 | 41.866962 | -87.695577 |
2018-12-21 | 18.811321 | 1.157903e+06 | 1.896157e+06 | 41.870791 | -87.695799 |
2018-12-22 | 34.670886 | 1.159684e+06 | 1.895026e+06 | 41.867647 | -87.689303 |
2018-12-23 | 26.040541 | 1.158972e+06 | 1.896172e+06 | 41.870809 | -87.691882 |
1637 rows × 5 columns
traffic.set_index("CAMERA ID", inplace=True)
traffic.sort_index(inplace=True)
traffic.groupby(level=0).mean()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
CAMERA ID | |||||
CHI003 | 116.335859 | 1.147853e+06 | 1.934275e+06 | 41.975605 | -87.731670 |
CHI004 | 34.781192 | 1.148759e+06 | 1.933818e+06 | 41.974333 | -87.728347 |
CHI005 | 12.046569 | 1.163048e+06 | 1.878843e+06 | 41.823189 | -87.677349 |
CHI007 | 59.697837 | 1.161038e+06 | 1.878964e+06 | 41.823564 | -87.684721 |
CHI008 | 19.142241 | 1.151781e+06 | 1.898395e+06 | 41.877071 | -87.718168 |
CHI009 | 46.374004 | 1.151845e+06 | 1.899805e+06 | 41.880938 | -87.717898 |
CHI010 | 55.978475 | NaN | NaN | NaN | NaN |
CHI011 | 15.503542 | 1.155438e+06 | 1.906531e+06 | 41.899325 | -87.704522 |
CHI013 | 43.584568 | 1.182453e+06 | 1.869712e+06 | 41.797704 | -87.606445 |
CHI014 | 58.330670 | 1.156276e+06 | 1.858583e+06 | 41.767732 | -87.702738 |
CHI015 | 13.998763 | 1.154378e+06 | 1.857352e+06 | 41.764391 | -87.709728 |
CHI018 | 41.265596 | 1.156183e+06 | 1.859145e+06 | 41.769276 | -87.703063 |
CHI019 | 40.185117 | 1.156992e+06 | 1.894531e+06 | 41.866364 | -87.699143 |
CHI020 | 24.795818 | 1.157040e+06 | 1.894612e+06 | 41.866585 | -87.698962 |
CHI021 | 116.066912 | 1.157136e+06 | 1.892362e+06 | 41.860408 | -87.698672 |
CHI022 | 9.982166 | 1.167029e+06 | 1.830594e+06 | 41.690702 | -87.664122 |
CHI023 | 20.836609 | 1.166994e+06 | 1.830711e+06 | 41.691025 | -87.664248 |
CHI024 | 43.347826 | 1.154183e+06 | 1.939669e+06 | 41.990282 | -87.708245 |
CHI025 | 17.594213 | 1.155158e+06 | 1.934429e+06 | 41.975884 | -87.704801 |
CHI026 | 29.622984 | 1.159788e+06 | 1.923418e+06 | 41.945574 | -87.688078 |
CHI027 | 42.721631 | 1.159704e+06 | 1.923559e+06 | 41.945963 | -87.688384 |
CHI028 | 67.848101 | 1.158541e+06 | 1.923785e+06 | 41.946608 | -87.692652 |
CHI029 | 77.253676 | 1.180985e+06 | 1.868165e+06 | 41.793493 | -87.611876 |
CHI030 | 24.646341 | 1.150574e+06 | 1.871490e+06 | 41.803265 | -87.723304 |
CHI031 | 58.457074 | 1.150513e+06 | 1.870811e+06 | 41.801402 | -87.723545 |
CHI032 | 21.004773 | 1.150881e+06 | 1.871374e+06 | 41.802939 | -87.722182 |
CHI033 | 20.791209 | 1.176498e+06 | 1.897411e+06 | 41.873850 | -87.627448 |
CHI034 | 15.169788 | 1.176418e+06 | 1.897401e+06 | 41.873824 | -87.627741 |
CHI035 | 5.924515 | 1.178587e+06 | 1.841967e+06 | 41.721657 | -87.621463 |
CHI036 | 11.588529 | 1.178616e+06 | 1.842048e+06 | 41.721878 | -87.621354 |
... | ... | ... | ... | ... | ... |
CHI154 | 4.730534 | 1.161996e+06 | 1.836307e+06 | 41.706487 | -87.682391 |
CHI156 | 16.366848 | 1.132581e+06 | 1.938004e+06 | 41.986119 | -87.787741 |
CHI157 | 8.686924 | 1.149395e+06 | 1.911608e+06 | 41.913375 | -87.726589 |
CHI158 | 29.301351 | 1.131748e+06 | 1.936860e+06 | 41.982995 | -87.790832 |
CHI159 | 12.723206 | 1.148867e+06 | 1.910282e+06 | 41.909746 | -87.728563 |
CHI160 | 30.117093 | 1.149437e+06 | 1.894438e+06 | 41.866257 | -87.726879 |
CHI161 | 12.437132 | 1.149006e+06 | 1.910365e+06 | 41.909972 | -87.728050 |
CHI162 | 18.089552 | 1.149923e+06 | 1.894858e+06 | 41.867402 | -87.725084 |
CHI163 | 20.076613 | 1.149841e+06 | 1.894931e+06 | 41.867603 | -87.725383 |
CHI164 | 6.827181 | 1.155954e+06 | 1.867614e+06 | 41.792520 | -87.703676 |
CHI165 | 2.479060 | 1.161996e+06 | 1.836227e+06 | 41.706268 | -87.682391 |
CHI166 | 1.718563 | 1.155768e+06 | 1.867930e+06 | 41.793392 | -87.704348 |
CHI167 | 2.097500 | 1.155816e+06 | 1.868012e+06 | 41.793615 | -87.704170 |
CHI168 | 26.908591 | 1.167170e+06 | 1.849194e+06 | 41.741742 | -87.663073 |
CHI169 | 32.552524 | 1.167083e+06 | 1.849460e+06 | 41.742474 | -87.663385 |
CHI170 | 6.366040 | 1.167695e+06 | 1.849654e+06 | 41.742993 | -87.661138 |
CHI171 | 4.818966 | 1.165145e+06 | 1.920791e+06 | 41.938253 | -87.668463 |
CHI172 | 14.719225 | 1.165058e+06 | 1.920991e+06 | 41.938805 | -87.668779 |
CHI173 | 40.184932 | 1.161334e+06 | 1.871365e+06 | 41.802706 | -87.683845 |
CHI174 | 114.339130 | 1.121895e+06 | 1.922706e+06 | 41.944319 | -87.827378 |
CHI175 | 8.948276 | 1.121638e+06 | 1.922907e+06 | 41.944874 | -87.828317 |
CHI176 | 11.000000 | 1.122009e+06 | 1.923002e+06 | 41.945129 | -87.826952 |
CHI177 | 2.961538 | 1.149783e+06 | 1.918290e+06 | 41.931705 | -87.724990 |
CHI178 | 14.756098 | 1.148258e+06 | 1.873075e+06 | 41.807658 | -87.731755 |
CHI179 | 5.363636 | 1.148661e+06 | 1.873165e+06 | 41.807896 | -87.730276 |
CHI180 | 29.145833 | 1.183268e+06 | 1.857719e+06 | 41.764775 | -87.603828 |
CHI181 | 27.836735 | 1.182997e+06 | 1.858058e+06 | 41.765711 | -87.604810 |
CHI182 | 58.250000 | 1.166307e+06 | 1.908050e+06 | 41.903268 | -87.664557 |
CHI183 | 24.083333 | 1.166381e+06 | 1.908132e+06 | 41.903490 | -87.664285 |
CHI184 | 74.446809 | 1.182951e+06 | 1.857908e+06 | 41.765301 | -87.604985 |
162 rows × 5 columns
traffic2 = fetch_traffic_data()
import datetime as dt
traffic2.sort_index(inplace=True)
# for future reference: below gives month and year
# traffic.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%b %Y'))
# A: weekday, B: month, d: date, Y: year, m: month
traffic2["weekday"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%A'))
traffic2.groupby(['weekday']).sum()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
weekday | |||||
Friday | 753468 | 2.969780e+10 | 4.849572e+10 | 1.072352e+06 | -2.246282e+06 |
Monday | 686724 | 2.976620e+10 | 4.860353e+10 | 1.074738e+06 | -2.251272e+06 |
Saturday | 676715 | 1.893861e+10 | 3.090150e+10 | 6.827700e+05 | -1.429940e+06 |
Sunday | 663801 | 1.892335e+10 | 3.087836e+10 | 6.822298e+05 | -1.428801e+06 |
Thursday | 730603 | 3.063207e+10 | 5.002385e+10 | 1.106136e+06 | -2.317053e+06 |
Tuesday | 708276 | 3.147777e+10 | 5.140274e+10 | 1.136724e+06 | -2.381161e+06 |
Wednesday | 705136 | 3.070936e+10 | 5.014615e+10 | 1.108929e+06 | -2.322930e+06 |
# Did the number of unique camera increase? it seems yes by a marginal number
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).agg('nunique')["CAMERA ID"].sort_index #['min', 'max', 'count', 'nunique'] possible
<bound method Series.sort_index of Year
2014 143
2015 150
2016 150
2017 150
2018 162
Name: CAMERA ID, dtype: int64>
# Any interesting pattern? Let's find out
traffic2.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 170521 entries, 2014-07-01 to 2018-12-23
Data columns (total 10 columns):
ADDRESS 170521 non-null object
CAMERA ID 170521 non-null object
VIOLATIONS 170521 non-null int64
X COORDINATE 163959 non-null float64
Y COORDINATE 163959 non-null float64
LATITUDE 163959 non-null float64
LONGITUDE 163959 non-null float64
LOCATION 163959 non-null object
weekday 170521 non-null object
Year 170521 non-null object
dtypes: float64(4), int64(1), object(5)
memory usage: 19.3+ MB
traffic2.describe()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
count | 170521.000000 | 1.639590e+05 | 1.639590e+05 | 163959.000000 | 163959.000000 |
mean | 28.880449 | 1.159712e+06 | 1.893473e+06 | 41.863389 | -87.689229 |
std | 36.636151 | 1.570560e+04 | 3.212393e+04 | 0.088338 | 0.057135 |
min | 1.000000 | 1.121638e+06 | 1.820629e+06 | 41.663174 | -87.828317 |
25% | 7.000000 | 1.149783e+06 | 1.868078e+06 | 41.793493 | -87.725084 |
50% | 16.000000 | 1.159169e+06 | 1.898488e+06 | 41.877243 | -87.689803 |
75% | 37.000000 | 1.167170e+06 | 1.920991e+06 | 41.939040 | -87.662810 |
max | 479.000000 | 1.203645e+06 | 1.943342e+06 | 42.000260 | -87.529848 |
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
location_plot = traffic2['VIOLATIONS'].hist(bins=20)
location_plot.set_title("Frequency and Number of Violations")
location_plot.set_xlabel("Number of Violations per Case")
location_plot.set_ylabel("Frequency of Cases")
Text(0,0.5,'Frequency of Cases')
traffic2["Year"] = traffic2.index.to_series().apply(lambda x: dt.datetime.strftime(x, '%Y'))
traffic2.groupby(['Year']).sum()
VIOLATIONS | X COORDINATE | Y COORDINATE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|
Year | |||||
2014 | 659424 | 1.892601e+10 | 3.093007e+10 | 6.832156e+05 | -1.430905e+06 |
2015 | 1201698 | 4.221439e+10 | 6.889669e+10 | 1.523463e+06 | -3.191149e+06 |
2016 | 1116805 | 4.352645e+10 | 7.105770e+10 | 1.571194e+06 | -3.291152e+06 |
2017 | 1015490 | 4.276128e+10 | 6.981971e+10 | 1.543780e+06 | -3.233736e+06 |
2018 | 931306 | 4.271703e+10 | 6.974767e+10 | 1.542226e+06 | -3.230496e+06 |
pd.to_datetime(traffic2.index.values.astype(float))
traffic2.plot(kind="scatter", x="LONGITUDE", y="LATITUDE", alpha=0.4, label="Traffic Violations per Case",
figsize=(10,7),s = traffic2["VIOLATIONS"], c=traffic2.index, cmap=plt.get_cmap('Reds'),
colorbar=True, sharex=False)
<matplotlib.axes._subplots.AxesSubplot at 0x117716828>
H1 Heading
H2 Heading
H3 Heading
basic text.
here’s italics
here’s bold
to get link: link
Bulleted lists:
- First item
- Second item
- Third item
Here’s a numbered list:
- first
- second
- third
simple python block:
import numpy as np
def sumup(x,y):
return np.sum(x,y)
R code block:
library(tidyverse)
df <- read_csv("some_file.csv")
head(df)
inline code ‘x+y’. math:
good to go.