More shit

This commit is contained in:
Alexander Munch-Hansen 2019-04-06 13:38:39 +02:00
parent 0a1708d7aa
commit 140a9eb574
2 changed files with 76 additions and 3 deletions

View file

@ -1,6 +1,71 @@
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib.request
from datetime import datetime, timedelta
import json
import os
def determine_month():
ds = pd.read_excel(urllib.request.urlopen('https://sundogbaelt.dk/wp-content/uploads/2019/04/trafiktal-maaned.xls'))
cur_year = 2019
amount_of_cur_year = sum([x == cur_year for x in ds['År']])
cur_year_total = sum(ds['Total'][1:amount_of_cur_year+1])
last_year_total = sum(ds['Total'][amount_of_cur_year+1:amount_of_cur_year+13])
return (12/(last_year_total//cur_year_total))+1
def is_tide():
month = determine_month()
tide_data = requests.get('https://www.dmi.dk/fileadmin/user_upload/Bruger_upload/Tidevand/2019/Aarhus.t.txt')
lines = tide_data.text[570:].split('\n')
tuples = [x.split('\t') for x in lines]
lel = [[datetime.strptime(x[0], '%Y%m%d%H%M'), x[1]] for x in tuples[:-1]]
matches = [[x[0], int(x[1])] for x in lel if x[0].month == month]
all_the_data = requests.get('https://www.dmi.dk/NinJo2DmiDk/ninjo2dmidk?cmd=odj&stations=22331&datatype=obs')
current_water_level = json.loads(all_the_data.content)[0]['values'][-1]['value']
# Generate average of when the water is high
last_match = matches[0]
moments = []
for idx, water_level in enumerate(matches[1:]):
#print(last_match[1], water_level[1])
diff = abs(last_match[1]) + abs(water_level[1])
time_diff = (water_level[0] - last_match[0]).seconds
average_inc = time_diff/diff
average_delta = timedelta(seconds=average_inc)
if last_match[1] < 0: # Increasing
time = last_match
while time[1] != current_water_level:
time[0] += average_delta
time[1] += 1
elif last_match[1] > 0: # Decreasing
time = last_match
while time[1] != current_water_level:
time[0] += average_delta
time[1] -= 1
last_match = water_level
moments.append(time[0])
night = sum([1 for x in moments if 6 >= x.hour or x.hour >= 22])
return night / len(moments)
def tmp():
r = requests.get('https://portal.opendata.dk/api/3/action/datastore_search?resource_id=b3eeb0ff-c8a8-4824-99d6-e0a3747c8b0d')
with open('traffic_data_13_23.json', 'w') as f:
json.dump(r.json(), f)
def scrape_traffic():
@ -23,10 +88,17 @@ def scrape_traffic():
elif curr_avg <= night_avr:
return 1.0
res = 1 - curr_avg / diff
assert(res < 1 and res > 0)
return res
scrape_traffic()
def scrape_weather():
r = requests.get('https://weather.com/weather/hourbyhour/l/99546:4:US')
soup = BeautifulSoup(r.content)
print(soup.find_all('td', {'class': 'temp'})[0])

File diff suppressed because one or more lines are too long