384 lines
18 KiB
Python
384 lines
18 KiB
Python
from db import get_bill_data, get_bill_types, get_bill_freqs, set_bill_type_growth, new_bill
|
|
from defines import END_YEAR
|
|
import datetime
|
|
from datetime import date, timedelta
|
|
|
|
|
|
################################################################################
|
|
|
|
# this finds start and end dates of a quarter for a given date
|
|
def quarter_bounds(d):
|
|
q = (d.month-1)//3
|
|
start = date(d.year, 3*q+1, 1)
|
|
# last day of quarter = first day of next quarter minus 1 day
|
|
if q == 3:
|
|
next_start = date(d.year+1, 1, 1)
|
|
else:
|
|
next_start = date(d.year, 3*q+4, 1)
|
|
end = next_start - timedelta(days=1)
|
|
return start, end
|
|
|
|
# this needs tweaking to be used to add to our total - but is close
|
|
def allocate_by_quarter( bill_info, bill_type, yr, prev_bill_date, curr_bill_date, amount, include_start=False, include_end=True):
|
|
start = prev_bill_date if include_start else prev_bill_date + timedelta(days=1)
|
|
end = curr_bill_date if include_end else curr_bill_date - timedelta(days=1)
|
|
if end < start:
|
|
return {}
|
|
if not 'qtr' in bill_info[bill_type]:
|
|
bill_info[bill_type]['qtr'] = {}
|
|
|
|
q_start, q_end = quarter_bounds(start)
|
|
cur = q_start
|
|
# walk quarters that might overlap - start from the quarter of `start`, iterate until past `end`
|
|
while cur <= end:
|
|
q_start, q_end = quarter_bounds(cur)
|
|
overlap_start = max(start, q_start)
|
|
overlap_end = min(end, q_end)
|
|
# only add qtr total for yr being calc'd
|
|
if overlap_end >= overlap_start:
|
|
days = (overlap_end - overlap_start).days + 1
|
|
q = (q_start.month-1)//3 + 1
|
|
# NEED LOGIC TO INIT IF ITS NOT HERE YET
|
|
if q_start.year not in bill_info[bill_type]['qtr']:
|
|
bill_info[bill_type]['qtr'][q_start.year] = {}
|
|
for i in range(1,5):
|
|
bill_info[bill_type]['qtr'][q_start.year][i]=0
|
|
# print( f"^^^^^^ adding {days*amount} into q={q}, yr={q_start.year}, with pbd={prev_bill_date}, cbd={curr_bill_date}, cba={amount}" )
|
|
bill_info[bill_type]['qtr'][q_start.year][q] += days*amount
|
|
# next quarter
|
|
cur = q_end + timedelta(days=1)
|
|
return
|
|
|
|
################################################################################
|
|
|
|
# give a bill dat in format YYYY-MM-DD, return quarter (1-4)
|
|
def qtr(d):
|
|
m = int(d[5:7])
|
|
return ( (m-1)//3 + 1 )
|
|
|
|
def find_next_bill( bill_type, bill_info, bill_date ):
|
|
wanted_year = int(bill_date[:4])
|
|
wanted_mm = int(bill_date[5:7])
|
|
# if we want a bill after our last year, just return None
|
|
if int(wanted_year) > int(bill_info[bill_type]['last_bill_year']):
|
|
return None
|
|
|
|
for yr in range( wanted_year, bill_info[bill_type]['last_bill_year']+1 ):
|
|
# start with bills in the year wanted (if any)
|
|
if yr in bill_info[bill_type]['year']:
|
|
# reverse this list so we can q1 bills before q4
|
|
for bill in bill_info[bill_type]['year'][yr][::-1]:
|
|
bill_mm = int(bill['bill_date'][5:7])
|
|
# if bill is in this year but later OR its a later year, return this bill
|
|
if (wanted_year == yr and bill_mm > wanted_mm) or wanted_year < yr:
|
|
return bill
|
|
# failsafe
|
|
return None
|
|
|
|
|
|
def find_previous_bill( bill_type, bill_info, bill_date ):
|
|
wanted_year = int(bill_date[:4])
|
|
wanted_mm = int(bill_date[5:7])
|
|
# if we don't have a bill before this date, no way to set price
|
|
if int(wanted_year) < int(bill_info[bill_type]['first_bill_year']):
|
|
return None
|
|
|
|
# start loop from bill_date, go backwards and find which one it is (same year, should be month-based)
|
|
# earlier year, then just last one from the year.
|
|
yr_range=range( wanted_year, bill_info[bill_type]['first_bill_year']-1, -1 )
|
|
if wanted_year == int(bill_info[bill_type]['first_bill_year']):
|
|
# range of this year with -1, does not return anything, so force this year.
|
|
yr_range=[ wanted_year ]
|
|
|
|
for yr in yr_range:
|
|
# start with bills in the year wanted (if any)
|
|
# must include 'estimated' bills to deal with growth of future years
|
|
if yr in bill_info[bill_type]['year']:
|
|
# okay, we have the previous billing year, and we wanted one for a year in the future,
|
|
# just return the last one in this year as its the most recent
|
|
if wanted_year > yr:
|
|
return bill_info[bill_type]['year'][yr][0]
|
|
else:
|
|
# lets go through the newest to oldest of these bills
|
|
for bill in bill_info[bill_type]['year'][yr]:
|
|
bill_mm = int(bill['bill_date'][5:7])
|
|
# reversing the bills, means we start with the 'most recent' in this year to the oldest
|
|
# if the month we want is after the bill, we are done
|
|
if wanted_mm > bill_mm:
|
|
return bill
|
|
return None
|
|
|
|
|
|
def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ):
|
|
# add to DB
|
|
new_bill( bill_type, amt, new_date, 1 )
|
|
|
|
# patch this data back into bill_info so growth works in future
|
|
if not yr in bill_info[bill_type]['year']:
|
|
bill_info[bill_type]['year'][yr]=[]
|
|
bill={}
|
|
bill['bill_date']=new_date
|
|
bill['amount']=amt
|
|
bill['estimated']=1
|
|
# need this for find_previous_bill to work but only need the above 3 fields
|
|
bill_info[bill_type]['year'][yr].append(bill)
|
|
return
|
|
|
|
|
|
# missing annual bill, find date based on MM-DD and add new year - given we start with first_bill anyway, will only be used for future bill predictions
|
|
# future only, so add ann_growth (based on drop-down) for each future year
|
|
# NOTE: only ever called when there is a need to add a new bill
|
|
def add_missing_annual_bill_in_yr( bill_type, bill_info, yr ):
|
|
mm_dd = bill_info[bill_type]['last_bill']['bill_date'][5:]
|
|
amt = bill_info[bill_type]['last_bill']['amount']
|
|
# okay the missing bill is before the first bill...
|
|
for i in range( bill_info[bill_type]['last_bill_year'], yr ):
|
|
amt += amt * bill_info[bill_type]['growth']/100
|
|
|
|
# last param is estimated (and this is an estimate for a future bill / not real)
|
|
new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' )
|
|
return
|
|
|
|
# missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year
|
|
# add growth (based on drop-down) for each future year
|
|
def add_missing_quarter_bills_in_yr( bill_type, bill_info, yr ):
|
|
# okay we have data for this year but some missing (wouldnt be here otherwise)
|
|
# and data from previous year... lets fill in gaps
|
|
if yr in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']:
|
|
# per if above, ONLY get here if we have first few bills of {yr}, cannot be last few
|
|
have_q = qtr( bill_info[bill_type]['year'][yr][0]['bill_date'] )
|
|
for q in range(have_q+1,5):
|
|
# use 5-q, as bills are in descending order in bill_info, e.g. q4 is 1st,
|
|
bill=bill_info[bill_type]['year'][yr-1][4-q]
|
|
mm_dd= bill['bill_date'][5:]
|
|
amt = bill['amount']*(1+bill_info[bill_type]['growth']/100)
|
|
new_date = f'{yr}-{mm_dd}'
|
|
new_estimated_bill( bill_info, yr, bill_type, amt, new_date )
|
|
|
|
# for now only add full new years based on last year with ann_growth (seasonal)
|
|
if yr not in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']:
|
|
for bill in bill_info[bill_type]['year'][yr-1]:
|
|
mm_dd= bill['bill_date'][5:]
|
|
amt = bill['amount']*(1+bill_info[bill_type]['growth']/100)
|
|
new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' )
|
|
return
|
|
|
|
# missing monthly bills, find date based on DD and put in each missing month
|
|
# add growth (based on drop-down) for each future year
|
|
# NOTE: ALWAYS called for first year - don't always add bills/see below
|
|
def add_missing_monthly_bills_in_yr( bill_type, bill_info, yr ):
|
|
|
|
# start date arithmetic from first bill (this is possibly an issue if monthly is not
|
|
# really perfectly the same each month, but its only for an estimate so should be ok
|
|
dd = bill_info[bill_type]['first_bill']['bill_date'][8:]
|
|
mm = bill_info[bill_type]['first_bill']['bill_date'][5:7]
|
|
lb_mm = bill_info[bill_type]['last_bill']['bill_date'][5:7]
|
|
|
|
#okay add monthly bills for the rest of this year if its the first year
|
|
if bill_info[bill_type]['first_bill_year'] == yr:
|
|
start_m=int(mm)
|
|
else:
|
|
start_m=0
|
|
|
|
# fill in rest of this year
|
|
for i in range( start_m+1, 13 ):
|
|
bill_found=False
|
|
new_date = f'{yr}-{i:02d}-{dd}'
|
|
if yr in bill_info[bill_type]['year']:
|
|
for b in bill_info[bill_type]['year'][yr]:
|
|
# this bill exists, skip adding it (this occurs when called to
|
|
# add bills as there are < 12 bills in first_year, BUT, we
|
|
# don't fill before first_bill so the < 12 ALWAYS triggers
|
|
if str(b['bill_date']) == new_date:
|
|
bill_found=True
|
|
break
|
|
if not bill_found:
|
|
pb=find_previous_bill( bill_type, bill_info, new_date )
|
|
nb=find_next_bill( bill_type, bill_info, new_date )
|
|
if not pb:
|
|
print("Failed to find previous_bill, can't calculate missing bill - returning" )
|
|
return
|
|
|
|
amt = pb['amount']
|
|
# if there is no next bill then use growth, otherwise, I am only putting in real bills
|
|
# where changes occur, so keep the pb amount 'unchanged'
|
|
if not nb:
|
|
# if this month is the same as the last bill month and as per above
|
|
# we don't have a bill for this date, then add annual grotwh
|
|
if i == int(lb_mm):
|
|
amt += amt * bill_info[bill_type]['growth']/100
|
|
bill_info[bill_type]['last_bill_amount']=amt
|
|
# last param is estimated (and this is an estimate for a future bill / not real)
|
|
new_estimated_bill( bill_info, yr, bill_type, amt, new_date )
|
|
return
|
|
|
|
# given the bill_type has a which_growth contain min/avg/max, return the corresponding growth number
|
|
def get_growth_value( bt, bill_type ):
|
|
for el in bt:
|
|
if el['id'] == bill_type:
|
|
which = el['which_growth']
|
|
break
|
|
|
|
if which == 'avg':
|
|
return el['ann_growth_avg']
|
|
elif which == 'min':
|
|
return el['ann_growth_min']
|
|
else:
|
|
return el['ann_growth_max']
|
|
|
|
|
|
# go through the bill data from the DB, put it into more friendly formats, then
|
|
# work out and then add missing bill data (might be b/c we have monthly bills,
|
|
# and I didn't want to input 12 of them at the same price), and it always
|
|
# occurs for future bills
|
|
def process_bill_data(bd, bt, bf):
|
|
# this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly)
|
|
bt_id_freq = {row["id"]: row["freq"] for row in bt}
|
|
bt_id_ann_growth_avg = {row["id"]: row["ann_growth_avg"] for row in bt}
|
|
|
|
# this maps freq to bills per annum (e.g. id=2 to 4 bills per annum)
|
|
bf_id_num = {row["id"]: row["num_bills_per_annum"] for row in bf}
|
|
|
|
# want to proces all bill data into easier to maniuplate structure, so make
|
|
# a bill_info[bill_id] with first_bill, last_bill, [yr] with matching bills to process
|
|
bill_info={}
|
|
|
|
for bill in bd:
|
|
bill_type = bill['bill_type_id']
|
|
yr= int(bill['bill_date'][:4])
|
|
# new bill type
|
|
if not bill_type in bill_info:
|
|
bill_info[bill_type]={}
|
|
bill_info[bill_type]['growth'] = get_growth_value( bt, bill_type )
|
|
bill_info[bill_type]['num_ann_bills'] = bf_id_num[bt_id_freq[bill_type]]
|
|
bill_info[bill_type]['first_bill']={}
|
|
bill_info[bill_type]['last_bill']={}
|
|
# due to sql sorting, this first instance is the last bill
|
|
bill_info[bill_type]['last_bill']=bill
|
|
bill_info[bill_type]['last_bill_year']=int(bill['bill_date'][:4])
|
|
if not bill['estimated']:
|
|
bill_info[bill_type]['last_real_bill_year']=int(bill['bill_date'][:4])
|
|
bill_info[bill_type]['year']={}
|
|
if not yr in bill_info[bill_type]['year']:
|
|
bill_info[bill_type]['year'][yr]=[]
|
|
|
|
# keep updating last to this matching bill
|
|
bill_info[bill_type]['first_bill']=bill
|
|
bill_info[bill_type]['first_bill_year']=int(bill['bill_date'][:4])
|
|
if not 'last_real_bill_year' in bill_info[bill_type] and not bill['estimated']:
|
|
bill_info[bill_type]['last_real_bill_year']=int(bill['bill_date'][:4])
|
|
# add this bill to list for this year
|
|
bill_info[bill_type]['year'][yr].append(bill)
|
|
|
|
# now process the bill_info from yr of first bill to yr of last bill
|
|
for bill_type in bill_info:
|
|
# find freq id based on bill_type id, then use that to find num bills by freq id
|
|
num = bf_id_num[bt_id_freq[bill_type]]
|
|
|
|
if 'last_bill' not in bill_info[bill_type]:
|
|
print("Cannot process bill_type={bill_type} - no bill info for it at all" )
|
|
# range of years to process (yr_min to yr_max)
|
|
yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4])
|
|
yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4])
|
|
|
|
# go from first_bill year until reach end year
|
|
for yr in range( yr_min, END_YEAR+1 ):
|
|
# we have all the bills needed for yr
|
|
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == bill_info[bill_type]['num_ann_bills']:
|
|
continue
|
|
add_missing_bills_for_yr( bill_type, bill_info, yr )
|
|
derive_ann_growth( bill_type, bill_info )
|
|
|
|
################################################################################
|
|
# add_missing_bills_for_yr -- wrapper to call right func based on bill freq
|
|
################################################################################
|
|
def add_missing_bills_for_yr( bill_type, bill_info, yr ):
|
|
num = bill_info[bill_type]['num_ann_bills']
|
|
if num == 1:
|
|
add_missing_annual_bill_in_yr( bill_type, bill_info, yr )
|
|
elif num == 4:
|
|
add_missing_quarter_bills_in_yr( bill_type, bill_info, yr )
|
|
elif num == 12:
|
|
add_missing_monthly_bills_in_yr( bill_type, bill_info, yr )
|
|
return
|
|
|
|
def derive_ann_growth( bill_type, bill_info ):
|
|
# just do up to now so we stop earlier than looking at other estimated (just an optimisation)
|
|
now_yr = datetime.date.today().year
|
|
|
|
# FIX UP CRAPPY QUARTERLY BILLING PROPORTIONS (only useful as some gas bills are 6 / year!)
|
|
if bill_info[bill_type]['num_ann_bills']==4:
|
|
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
|
|
for b in bill_info[bill_type]['year'][yr]:
|
|
pb = find_previous_bill( bill_type, bill_info, b['bill_date'] )
|
|
if not pb:
|
|
continue
|
|
|
|
pb_d=pb['bill_date']
|
|
b_d=b['bill_date']
|
|
date1 = date( int(pb_d[:4]), int(pb_d[5:7]), int(pb_d[8:]))
|
|
date2 = date( int(b_d[:4]), int(b_d[5:7]), int(b_d[8:]))
|
|
|
|
time_difference = date2 - date1
|
|
days = time_difference.days
|
|
cost_per_day = b['amount']/days
|
|
allocate_by_quarter( bill_info, bill_type, yr, date1, date2, cost_per_day )
|
|
|
|
total={}
|
|
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
|
|
# if not enough bills in this year (or none), then try next year (first year might have not enough bills)
|
|
if yr not in bill_info[bill_type]['year'] or len(bill_info[bill_type]['year'][yr]) < bill_info[bill_type]['num_ann_bills']:
|
|
continue;
|
|
|
|
# just going to make sure we dont use estimated data in the last year of real data - can skew growths
|
|
if yr == bill_info[bill_type]['last_real_bill_year']:
|
|
skip_yr=False
|
|
for b in bill_info[bill_type]['year'][yr]:
|
|
if b['estimated']:
|
|
skip_yr=True
|
|
if skip_yr:
|
|
continue
|
|
|
|
total[yr] = 0
|
|
for b in bill_info[bill_type]['year'][yr]:
|
|
total[yr] += b['amount']
|
|
#crazily we have more bills in this year than expected, so work out qtrly costs
|
|
|
|
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
|
|
if 'qtr' in bill_info[bill_type] and yr in bill_info[bill_type]['qtr']:
|
|
tot=0
|
|
for q in range( 1,5 ):
|
|
tot += bill_info[bill_type]['qtr'][yr][q]
|
|
if yr in total:
|
|
# use new derived qtr, slightly more accurate
|
|
total[yr]=tot
|
|
|
|
|
|
# once we have all yr totals:
|
|
growth = {}
|
|
min_growth = 999
|
|
avg_growth = 0
|
|
max_growth = 0
|
|
count = 0
|
|
# start from year after first bill, so we can see annual growth from the following year onwards
|
|
for yr in range( bill_info[bill_type]['first_bill_year']+1, now_yr+1):
|
|
# if full data sets for consecutive years, work out annual growth stats
|
|
if yr-1 in total and yr in total:
|
|
growth = (total[yr] - total[yr-1]) / total[yr-1] * 100
|
|
avg_growth += growth
|
|
count += 1
|
|
if growth < min_growth:
|
|
min_growth = growth
|
|
if growth > max_growth:
|
|
max_growth = growth
|
|
if count:
|
|
print( f"Before sanity check, min={min_growth}, avg={avg_growth/count}, max_growth={max_growth}" )
|
|
# HACK FOR SANITY SAKE NOW - bills wont decrease normally, and 10% is unlikely for sustained growth
|
|
if min_growth< 0: min_growth=0
|
|
if avg_growth< 0 or avg_growth > 10: avg_growth = 3*count
|
|
if max_growth>10 : max_growth = 9.99
|
|
set_bill_type_growth( bill_type, min_growth, avg_growth/count, max_growth )
|
|
else:
|
|
# failsafe (just in case fill bills failed to add enough bills to average out)
|
|
print( f"{bill_type}: Unable to calculate growth!" )
|