From 89d58e4cd340fccf690953096688a153468a0414 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Mon, 25 Aug 2025 18:46:24 +1000 Subject: [PATCH] fixed a few logic bugs with quarterly data, now accurately puts bill proportions into relevant quarters and estimates future bills based on quarterly data - all works so far --- bills.py | 131 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 47 deletions(-) diff --git a/bills.py b/bills.py index 6ca1ffa..d38084c 100644 --- a/bills.py +++ b/bills.py @@ -5,8 +5,8 @@ from datetime import date, timedelta ################################################################################ - # this finds start and end dates of a quarter for a given date +################################################################################ def quarter_bounds(d): q = (d.month-1)//3 start = date(d.year, 3*q+1, 1) @@ -18,10 +18,18 @@ def quarter_bounds(d): end = next_start - timedelta(days=1) return start, end -# this needs tweaking to be used to add to our total - but is close -def allocate_by_quarter( bill_info, bill_type, yr, prev_bill_date, curr_bill_date, amount, include_start=False, include_end=True): - start = prev_bill_date if include_start else prev_bill_date + timedelta(days=1) - end = curr_bill_date if include_end else curr_bill_date - timedelta(days=1) +################################################################################ +# takes a bill and its previous bill, works out days between and adds cost / day +# to each quarter the bill covers from prev. to now. Usually means it splits +# one bill in a previous and this qtr (or just puts it all into the current qtr) +################################################################################ +def allocate_by_quarter( bill_info, bill_type, yr, prev_bill, bill): + start = date( int(prev_bill['bill_date'][:4]), int(prev_bill['bill_date'][5:7]), int(prev_bill['bill_date'][8:])) + end = date( int(bill['bill_date'][:4]), int(bill['bill_date'][5:7]), int(bill['bill_date'][8:])) + + time_difference = end - start + days = time_difference.days + cost_per_day = bill['amount']/days if end < start: return {} if not 'qtr' in bill_info[bill_type]: @@ -38,24 +46,26 @@ def allocate_by_quarter( bill_info, bill_type, yr, prev_bill_date, curr_bill_dat if overlap_end >= overlap_start: days = (overlap_end - overlap_start).days + 1 q = (q_start.month-1)//3 + 1 - # NEED LOGIC TO INIT IF ITS NOT HERE YET + # initialise arrays if needed if q_start.year not in bill_info[bill_type]['qtr']: bill_info[bill_type]['qtr'][q_start.year] = {} for i in range(1,5): bill_info[bill_type]['qtr'][q_start.year][i]=0 -# print( f"^^^^^^ adding {days*amount} into q={q}, yr={q_start.year}, with pbd={prev_bill_date}, cbd={curr_bill_date}, cba={amount}" ) - bill_info[bill_type]['qtr'][q_start.year][q] += days*amount + bill_info[bill_type]['qtr'][q_start.year][q] += days*cost_per_day # next quarter cur = q_end + timedelta(days=1) return ################################################################################ - -# give a bill dat in format YYYY-MM-DD, return quarter (1-4) +# given a bill date in format YYYY-MM-DD, return quarter (1-4) +################################################################################ def qtr(d): m = int(d[5:7]) return ( (m-1)//3 + 1 ) +################################################################################ +# find the bill just after the date given +################################################################################ def find_next_bill( bill_type, bill_info, bill_date ): wanted_year = int(bill_date[:4]) wanted_mm = int(bill_date[5:7]) @@ -76,6 +86,7 @@ def find_next_bill( bill_type, bill_info, bill_date ): return None +# find the bill just before the date given def find_previous_bill( bill_type, bill_info, bill_date ): wanted_year = int(bill_date[:4]) wanted_mm = int(bill_date[5:7]) @@ -109,6 +120,9 @@ def find_previous_bill( bill_type, bill_info, bill_date ): return None +# quick wrapper to add a new estimated bill - new estimates have the flag in +# the DB set, but also we update bill_info to reflect the new bill so future +# growth can build of this esimate too - e.g 2030 can use 2029, etc def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ): # add to DB new_bill( bill_type, amt, new_date, 1 ) @@ -121,7 +135,18 @@ def new_estimated_bill( bill_info, yr, bill_type, amt, new_date ): bill['amount']=amt bill['estimated']=1 # need this for find_previous_bill to work but only need the above 3 fields - bill_info[bill_type]['year'][yr].append(bill) + bill_info[bill_type]['year'][yr].insert(0,bill) + + if bill_info[bill_type]['num_ann_bills'] == 4: + q = qtr( new_date ) + if yr not in bill_info[bill_type]['qtr']: + bill_info[bill_type]['qtr'][yr]={} + pb = find_previous_bill( bill_type, bill_info, new_date ) + if pb['estimated'] == 0: + print( f" FIXFIXFIX - have a prev real bill={pb['bill_date']} & this is first est - likely need to better apportion this bill into the quarters" ) + allocate_by_quarter( bill_info, bill_type, yr, pb, bill ) + + bill_info[bill_type]['qtr'][yr][q]=amt return @@ -135,32 +160,33 @@ def add_missing_annual_bill_in_yr( bill_type, bill_info, yr ): for i in range( bill_info[bill_type]['last_bill_year'], yr ): amt += amt * bill_info[bill_type]['growth']/100 - # last param is estimated (and this is an estimate for a future bill / not real) new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' ) return # missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year # add growth (based on drop-down) for each future year def add_missing_quarter_bills_in_yr( bill_type, bill_info, yr ): - # okay we have data for this year but some missing (wouldnt be here otherwise) - # and data from previous year... lets fill in gaps - if yr in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']: - # per if above, ONLY get here if we have first few bills of {yr}, cannot be last few - have_q = qtr( bill_info[bill_type]['year'][yr][0]['bill_date'] ) - for q in range(have_q+1,5): - # use 5-q, as bills are in descending order in bill_info, e.g. q4 is 1st, - bill=bill_info[bill_type]['year'][yr-1][4-q] - mm_dd= bill['bill_date'][5:] - amt = bill['amount']*(1+bill_info[bill_type]['growth']/100) - new_date = f'{yr}-{mm_dd}' + # okay we have data for last year but some missing (in this year), lets fill in gaps + # could be called if only have data for q2 - q4 in first year and we dont have a previous years q1 data so don't try + if 'qtr' in bill_info[bill_type] and yr-1 in bill_info[bill_type]['qtr']: + # if we do have data in this year, we have q1-q3 only, and want missing qtrs set range appropriately... + if yr in bill_info[bill_type]['qtr']: + # per if above, ONLY get here if we have first few bills of {yr}, cannot be last few + have_q = qtr( bill_info[bill_type]['year'][yr][0]['bill_date'] ) + r=range(have_q+1,5) + else: + r=range(1,5) + for q in r: + # amt is total of last year's qtr bill proportion + amt = bill_info[bill_type]['qtr'][yr-1][q]*(1+bill_info[bill_type]['growth']/100) + # just make new bills first of last month of a qtr (good as any date for GAS, they move anyway) + new_date = f'{yr}-{q*3:02d}-01' + # SANITY CHECK: we might be adding a bill estimate we already have (due to stupid gas bills /qtrly code) + if yr in bill_info[bill_type]['year']: + for b in bill_info[bill_type]['year'][yr]: + if b['bill_date'] == new_date: + return new_estimated_bill( bill_info, yr, bill_type, amt, new_date ) - - # for now only add full new years based on last year with ann_growth (seasonal) - if yr not in bill_info[bill_type]['year'] and yr-1 in bill_info[bill_type]['year']: - for bill in bill_info[bill_type]['year'][yr-1]: - mm_dd= bill['bill_date'][5:] - amt = bill['amount']*(1+bill_info[bill_type]['growth']/100) - new_estimated_bill( bill_info, yr, bill_type, amt, f'{yr}-{mm_dd}' ) return # missing monthly bills, find date based on DD and put in each missing month @@ -208,7 +234,6 @@ def add_missing_monthly_bills_in_yr( bill_type, bill_info, yr ): if i == int(lb_mm): amt += amt * bill_info[bill_type]['growth']/100 bill_info[bill_type]['last_bill_amount']=amt - # last param is estimated (and this is an estimate for a future bill / not real) new_estimated_bill( bill_info, yr, bill_type, amt, new_date ) return @@ -227,10 +252,12 @@ def get_growth_value( bt, bill_type ): return el['ann_growth_max'] +################################################################################ # go through the bill data from the DB, put it into more friendly formats, then # work out and then add missing bill data (might be b/c we have monthly bills, # and I didn't want to input 12 of them at the same price), and it always # occurs for future bills +################################################################################ def process_bill_data(bd, bt, bf): # this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly) bt_id_freq = {row["id"]: row["freq"] for row in bt} @@ -281,10 +308,12 @@ def process_bill_data(bd, bt, bf): yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4]) yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4]) + ProportionQtrlyData( bill_type, bill_info ) + # go from first_bill year until reach end year for yr in range( yr_min, END_YEAR+1 ): - # we have all the bills needed for yr - if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == bill_info[bill_type]['num_ann_bills']: + # we have all the bills needed for yr - but dont be cute with qtrly, gas bills suck can have missing with 4 bills + if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == bill_info[bill_type]['num_ann_bills'] and bill_info[bill_type]['num_ann_bills'] !=4: continue add_missing_bills_for_yr( bill_type, bill_info, yr ) derive_ann_growth( bill_type, bill_info ) @@ -302,10 +331,17 @@ def add_missing_bills_for_yr( bill_type, bill_info, yr ): add_missing_monthly_bills_in_yr( bill_type, bill_info, yr ) return -def derive_ann_growth( bill_type, bill_info ): - # just do up to now so we stop earlier than looking at other estimated (just an optimisation) +################################################################################ +# Takes qtrly bills and start from 2nd year of bills (so we can estimate growth) +# and go through each bill allocating hte proportion of each bill to each +# relevant quarter - to build more accurate totals. Would be mostly marginal +# accept when Gas qtrly bills have 6 per year, and we need to guess say qtr4 in +# the future, we can't easily find corresponding bill form previous year, so +# this allows us to aggregate per quarter and use matching quarter +################################################################################ +def ProportionQtrlyData( bill_type, bill_info ): + # just do up to now for the moment so that add_missing_bills later will have qtr data to use now_yr = datetime.date.today().year - # FIX UP CRAPPY QUARTERLY BILLING PROPORTIONS (only useful as some gas bills are 6 / year!) if bill_info[bill_type]['num_ann_bills']==4: for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): @@ -313,16 +349,17 @@ def derive_ann_growth( bill_type, bill_info ): pb = find_previous_bill( bill_type, bill_info, b['bill_date'] ) if not pb: continue + allocate_by_quarter( bill_info, bill_type, yr, pb, b ) + return - pb_d=pb['bill_date'] - b_d=b['bill_date'] - date1 = date( int(pb_d[:4]), int(pb_d[5:7]), int(pb_d[8:])) - date2 = date( int(b_d[:4]), int(b_d[5:7]), int(b_d[8:])) - - time_difference = date2 - date1 - days = time_difference.days - cost_per_day = b['amount']/days - allocate_by_quarter( bill_info, bill_type, yr, date1, date2, cost_per_day ) +################################################################################ +# function to work out totals per year, and then calcuates annual growth in +# terms of min/avg/max - uses qtr data for qtrly bills, or just normal totals +# for other bill types +################################################################################ +def derive_ann_growth( bill_type, bill_info ): + # just do up to now so we stop earlier than looking at other estimated (just an optimisation) + now_yr = datetime.date.today().year total={} for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): @@ -342,8 +379,8 @@ def derive_ann_growth( bill_type, bill_info ): total[yr] = 0 for b in bill_info[bill_type]['year'][yr]: total[yr] += b['amount'] - #crazily we have more bills in this year than expected, so work out qtrly costs + # crazily we can have more bills in this year than expected, so work out qtrly costs, and patch that back into total array for yr in range( bill_info[bill_type]['first_bill_year'], now_yr+1): if 'qtr' in bill_info[bill_type] and yr in bill_info[bill_type]['qtr']: tot=0 @@ -372,7 +409,7 @@ def derive_ann_growth( bill_type, bill_info ): if growth > max_growth: max_growth = growth if count: - print( f"Before sanity check, min={min_growth}, avg={avg_growth/count}, max_growth={max_growth}" ) + ## print( f"Before sanity check, min={min_growth}, avg={avg_growth/count}, max_growth={max_growth}" ) # HACK FOR SANITY SAKE NOW - bills wont decrease normally, and 10% is unlikely for sustained growth if min_growth< 0: min_growth=0 if avg_growth< 0 or avg_growth > 10: avg_growth = 3*count