fixed a few logic bugs with quarterly data, now accurately puts bill proportions into relevant quarters and estimates future bills based on quarterly data - all works so far

This commit is contained in:
2025-08-25 18:46:24 +10:00
parent 65fc68e0bf
commit 89d58e4cd3

127
bills.py
View File

@@ -5,8 +5,8 @@ from datetime import date, timedelta
################################################################################ ################################################################################
# this finds start and end dates of a quarter for a given date # this finds start and end dates of a quarter for a given date
################################################################################
def quarter_bounds(d): def quarter_bounds(d):
q = (d.month-1)//3 q = (d.month-1)//3
start = date(d.year, 3*q+1, 1) start = date(d.year, 3*q+1, 1)
@@ -18,10 +18,18 @@ def quarter_bounds(d):
end = next_start - timedelta(days=1) end = next_start - timedelta(days=1)
return start, end return start, end
# this needs tweaking to be used to add to our total - but is close ################################################################################
def allocate_by_quarter( bill_info, bill_type, yr, prev_bill_date, curr_bill_date, amount, include_start=False, include_end=True): # takes a bill and its previous bill, works out days between and adds cost / day
start = prev_bill_date if include_start else prev_bill_date + timedelta(days=1) # to each quarter the bill covers from prev. to now. Usually means it splits
end = curr_bill_date if include_end else curr_bill_date - timedelta(days=1) # one bill in a previous and this qtr (or just puts it all into the current qtr)
################################################################################
def allocate_by_quarter( bill_info, bill_type, yr, prev_bill, bill):
start = date( int(prev_bill['bill_date'][:4]), int(prev_bill['bill_date'][5:7]), int(prev_bill['bill_date'][8:]))
end = date( int(bill['bill_date'][:4]), int(bill['bill_date'][5:7]), int(bill['bill_date'][8:]))
time_difference = end - start
days = time_difference.days
cost_per_day = bill['amount']/days
if end < start: if end < start:
return {} return {}
if not 'qtr' in bill_info[bill_type]: if not 'qtr' in bill_info[bill_type]:
@@ -38,24 +46,26 @@ def allocate_by_quarter( bill_info, bill_type, yr, prev_bill_date, curr_bill_dat
if overlap_end >= overlap_start: if overlap_end >= overlap_start:
days = (overlap_end - overlap_start).days + 1 days = (overlap_end - overlap_start).days + 1
q = (q_start.month-1)//3 + 1 q = (q_start.month-1)//3 + 1
# NEED LOGIC TO INIT IF ITS NOT HERE YET # initialise arrays if needed
if q_start.year not in bill_info[bill_type]['qtr']: if q_start.year not in bill_info[bill_type]['qtr']:
bill_info[bill_type]['qtr'][q_start.year] = {} bill_info[bill_type]['qtr'][q_start.year] = {}
for i in range(1,5): for i in range(1,5):
bill_info[bill_type]['qtr'][q_start.year][i]=0 bill_info[bill_type]['qtr'][q_start.year][i]=0
# print( f"^^^^^^ adding {days*amount} into q={q}, yr={q_start.year}, with pbd={prev_bill_date}, cbd={curr_bill_date}, cba={amount}" ) bill_info[bill_type]['qtr'][q_start.year][q] += days*cost_per_day
bill_info[bill_type]['qtr'][q_start.year][q] += days*amount
# next quarter # next quarter
cur = q_end + timedelta(days=1) cur = q_end + timedelta(days=1)
return return
################################################################################ ################################################################################
# given a bill date in format YYYY-MM-DD, return quarter (1-4)
# give a bill dat in format YYYY-MM-DD, return quarter (1-4) ################################################################################
def qtr(d): def qtr(d):
m = int(d[5:7]) m = int(d[5:7])
return ( (m-1)//3 + 1 ) return ( (m-1)//3 + 1 )
################################################################################
# find the bill just after the date given
################################################################################
def find_next_bill( bill_type, bill_info, bill_date ): def find_next_bill( bill_type, bill_info, bill_date ):
wanted_year = int(bill_date[:4]) wanted_year = int(bill_date[:4])
wanted_mm = int(bill_date[5:7]) wanted_mm = int(bill_date[5:7])
@@ -76,6 +86,7 @@ def find_next_bill( bill_type, bill_info, bill_date ):
return None return None
# find the bill just before the date given
def find_previous_bill( bill_type, bill_info, bill_date ): def find_previous_bill( bill_type, bill_info, bill_date ):
wanted_year = int(bill_date[:4]) wanted_year = int(bill_date[:4])
wanted_mm = int(bill_date[5:7]) wanted_mm = int(bill_date[5:7])
@@ -109,6 +120,9 @@ def find_previous_bill( bill_type, bill_info, bill_date ):
return None return None
# quick wrapper to add a new estimated bill - new estimates have the flag in
# the DB set, but also we update bill_info to reflect the new bill so future
# growth can build of this esimate too - e.g 2030 can use 2029, etc
def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ): def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ):
# add to DB # add to DB
new_bill( bill_type, amt, new_date, 1 ) new_bill( bill_type, amt, new_date, 1 )
@@ -121,7 +135,18 @@ def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ):
bill['amount']=amt bill['amount']=amt
bill['estimated']=1 bill['estimated']=1
# need this for find_previous_bill to work but only need the above 3 fields # need this for find_previous_bill to work but only need the above 3 fields
bill_info[bill_type]['year'][yr].append(bill) bill_info[bill_type]['year'][yr].insert(0,bill)
if bill_info[bill_type]['num_ann_bills'] == 4:
q = qtr( new_date )
if yr not in bill_info[bill_type]['qtr']:
bill_info[bill_type]['qtr'][yr]={}
pb = find_previous_bill( bill_type, bill_info, new_date )
if pb['estimated'] == 0:
print( f" FIXFIXFIX - have a prev real bill={pb['bill_date']} & this is first est - likely need to better apportion this bill into the quarters" )
allocate_by_quarter( bill_info, bill_type, yr, pb, bill )
bill_info[bill_type]['qtr'][yr][q]=amt
return return
@@ -135,32 +160,33 @@ def add_missing_annual_bill_in_yr( bill_type, bill_info, yr ):
for i in range( bill_info[bill_type]['last_bill_year'], yr ): for i in range( bill_info[bill_type]['last_bill_year'], yr ):
amt += amt * bill_info[bill_type]['growth']/100 amt += amt * bill_info[bill_type]['growth']/100
# last param is estimated (and this is an estimate for a future bill / not real)
new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' ) new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' )
return return
# missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year # missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year
# add growth (based on drop-down) for each future year # add growth (based on drop-down) for each future year
def add_missing_quarter_bills_in_yr( bill_type, bill_info, yr ): def add_missing_quarter_bills_in_yr( bill_type, bill_info, yr ):
# okay we have data for this year but some missing (wouldnt be here otherwise) # okay we have data for last year but some missing (in this year), lets fill in gaps
# and data from previous year... lets fill in gaps # could be called if only have data for q2 - q4 in first year and we dont have a previous years q1 data so don't try
if yr in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']: if 'qtr' in bill_info[bill_type] and yr-1 in bill_info[bill_type]['qtr']:
# if we do have data in this year, we have q1-q3 only, and want missing qtrs set range appropriately...
if yr in bill_info[bill_type]['qtr']:
# per if above, ONLY get here if we have first few bills of {yr}, cannot be last few # per if above, ONLY get here if we have first few bills of {yr}, cannot be last few
have_q = qtr( bill_info[bill_type]['year'][yr][0]['bill_date'] ) have_q = qtr( bill_info[bill_type]['year'][yr][0]['bill_date'] )
for q in range(have_q+1,5): r=range(have_q+1,5)
# use 5-q, as bills are in descending order in bill_info, e.g. q4 is 1st, else:
bill=bill_info[bill_type]['year'][yr-1][4-q] r=range(1,5)
mm_dd= bill['bill_date'][5:] for q in r:
amt = bill['amount']*(1+bill_info[bill_type]['growth']/100) # amt is total of last year's qtr bill proportion
new_date = f'{yr}-{mm_dd}' amt = bill_info[bill_type]['qtr'][yr-1][q]*(1+bill_info[bill_type]['growth']/100)
# just make new bills first of last month of a qtr (good as any date for GAS, they move anyway)
new_date = f'{yr}-{q*3:02d}-01'
# SANITY CHECK: we might be adding a bill estimate we already have (due to stupid gas bills /qtrly code)
if yr in bill_info[bill_type]['year']:
for b in bill_info[bill_type]['year'][yr]:
if b['bill_date'] == new_date:
return
new_estimated_bill( bill_info, yr, bill_type, amt, new_date ) new_estimated_bill( bill_info, yr, bill_type, amt, new_date )
# for now only add full new years based on last year with ann_growth (seasonal)
if yr not in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']:
for bill in bill_info[bill_type]['year'][yr-1]:
mm_dd= bill['bill_date'][5:]
amt = bill['amount']*(1+bill_info[bill_type]['growth']/100)
new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' )
return return
# missing monthly bills, find date based on DD and put in each missing month # missing monthly bills, find date based on DD and put in each missing month
@@ -208,7 +234,6 @@ def add_missing_monthly_bills_in_yr( bill_type, bill_info, yr ):
if i == int(lb_mm): if i == int(lb_mm):
amt += amt * bill_info[bill_type]['growth']/100 amt += amt * bill_info[bill_type]['growth']/100
bill_info[bill_type]['last_bill_amount']=amt bill_info[bill_type]['last_bill_amount']=amt
# last param is estimated (and this is an estimate for a future bill / not real)
new_estimated_bill( bill_info, yr, bill_type, amt, new_date ) new_estimated_bill( bill_info, yr, bill_type, amt, new_date )
return return
@@ -227,10 +252,12 @@ def get_growth_value( bt, bill_type ):
return el['ann_growth_max'] return el['ann_growth_max']
################################################################################
# go through the bill data from the DB, put it into more friendly formats, then # go through the bill data from the DB, put it into more friendly formats, then
# work out and then add missing bill data (might be b/c we have monthly bills, # work out and then add missing bill data (might be b/c we have monthly bills,
# and I didn't want to input 12 of them at the same price), and it always # and I didn't want to input 12 of them at the same price), and it always
# occurs for future bills # occurs for future bills
################################################################################
def process_bill_data(bd, bt, bf): def process_bill_data(bd, bt, bf):
# this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly) # this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly)
bt_id_freq = {row["id"]: row["freq"] for row in bt} bt_id_freq = {row["id"]: row["freq"] for row in bt}
@@ -281,10 +308,12 @@ def process_bill_data(bd, bt, bf):
yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4]) yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4])
yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4]) yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4])
ProportionQtrlyData( bill_type, bill_info )
# go from first_bill year until reach end year # go from first_bill year until reach end year
for yr in range( yr_min, END_YEAR+1 ): for yr in range( yr_min, END_YEAR+1 ):
# we have all the bills needed for yr # we have all the bills needed for yr - but dont be cute with qtrly, gas bills suck can have missing with 4 bills
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == bill_info[bill_type]['num_ann_bills']: if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == bill_info[bill_type]['num_ann_bills'] and bill_info[bill_type]['num_ann_bills'] !=4:
continue continue
add_missing_bills_for_yr( bill_type, bill_info, yr ) add_missing_bills_for_yr( bill_type, bill_info, yr )
derive_ann_growth( bill_type, bill_info ) derive_ann_growth( bill_type, bill_info )
@@ -302,10 +331,17 @@ def add_missing_bills_for_yr( bill_type, bill_info, yr ):
add_missing_monthly_bills_in_yr( bill_type, bill_info, yr ) add_missing_monthly_bills_in_yr( bill_type, bill_info, yr )
return return
def derive_ann_growth( bill_type, bill_info ): ################################################################################
# just do up to now so we stop earlier than looking at other estimated (just an optimisation) # Takes qtrly bills and start from 2nd year of bills (so we can estimate growth)
# and go through each bill allocating hte proportion of each bill to each
# relevant quarter - to build more accurate totals. Would be mostly marginal
# accept when Gas qtrly bills have 6 per year, and we need to guess say qtr4 in
# the future, we can't easily find corresponding bill form previous year, so
# this allows us to aggregate per quarter and use matching quarter
################################################################################
def ProportionQtrlyData( bill_type, bill_info ):
# just do up to now for the moment so that add_missing_bills later will have qtr data to use
now_yr = datetime.date.today().year now_yr = datetime.date.today().year
# FIX UP CRAPPY QUARTERLY BILLING PROPORTIONS (only useful as some gas bills are 6 / year!) # FIX UP CRAPPY QUARTERLY BILLING PROPORTIONS (only useful as some gas bills are 6 / year!)
if bill_info[bill_type]['num_ann_bills']==4: if bill_info[bill_type]['num_ann_bills']==4:
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
@@ -313,16 +349,17 @@ def derive_ann_growth( bill_type, bill_info ):
pb = find_previous_bill( bill_type, bill_info, b['bill_date'] ) pb = find_previous_bill( bill_type, bill_info, b['bill_date'] )
if not pb: if not pb:
continue continue
allocate_by_quarter( bill_info, bill_type, yr, pb, b )
return
pb_d=pb['bill_date'] ################################################################################
b_d=b['bill_date'] # function to work out totals per year, and then calcuates annual growth in
date1 = date( int(pb_d[:4]), int(pb_d[5:7]), int(pb_d[8:])) # terms of min/avg/max - uses qtr data for qtrly bills, or just normal totals
date2 = date( int(b_d[:4]), int(b_d[5:7]), int(b_d[8:])) # for other bill types
################################################################################
time_difference = date2 - date1 def derive_ann_growth( bill_type, bill_info ):
days = time_difference.days # just do up to now so we stop earlier than looking at other estimated (just an optimisation)
cost_per_day = b['amount']/days now_yr = datetime.date.today().year
allocate_by_quarter( bill_info, bill_type, yr, date1, date2, cost_per_day )
total={} total={}
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
@@ -342,8 +379,8 @@ def derive_ann_growth( bill_type, bill_info ):
total[yr] = 0 total[yr] = 0
for b in bill_info[bill_type]['year'][yr]: for b in bill_info[bill_type]['year'][yr]:
total[yr] += b['amount'] total[yr] += b['amount']
#crazily we have more bills in this year than expected, so work out qtrly costs
# crazily we can have more bills in this year than expected, so work out qtrly costs, and patch that back into total array
for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1):
if 'qtr' in bill_info[bill_type] and yr in bill_info[bill_type]['qtr']: if 'qtr' in bill_info[bill_type] and yr in bill_info[bill_type]['qtr']:
tot=0 tot=0
@@ -372,7 +409,7 @@ def derive_ann_growth( bill_type, bill_info ):
if growth > max_growth: if growth > max_growth:
max_growth = growth max_growth = growth
if count: if count:
print( f"Before sanity check, min={min_growth}, avg={avg_growth/count}, max_growth={max_growth}" ) ## print( f"Before sanity check, min={min_growth}, avg={avg_growth/count}, max_growth={max_growth}" )
# HACK FOR SANITY SAKE NOW - bills wont decrease normally, and 10% is unlikely for sustained growth # HACK FOR SANITY SAKE NOW - bills wont decrease normally, and 10% is unlikely for sustained growth
if min_growth< 0: min_growth=0 if min_growth< 0: min_growth=0
if avg_growth< 0 or avg_growth > 10: avg_growth = 3*count if avg_growth< 0 or avg_growth > 10: avg_growth = 3*count