major rewrite, took on-board thoughts in TODO, have completely re-written how we process bill_data, and then subsequent growth. Much simpler now (although still complex) - most is now done in one loop to take DB data nd reformat it into an in memory data structure, then process that a few different ways to see missing and future bills, and then calc growths. Still much to go, I do calc missing/future annual bills, but I am not actually adding them to the DB (want to distinguish them from real bills still in DB), not yet calculating additional bills for monthly or quarterly (so not adding them to DB either), then interface would need to show/hide real vs auto-filled bills. To note growth only takes into account real bills, BUT, it also only calcs growth on consecuttive full year data sets - e.g. years with quarterly bills for less than the full year are ignored for now

This commit is contained in:
2025-08-18 17:49:36 +10:00
parent 232f16deba
commit 7ac7acf44c

182
bills.py
View File

@@ -1,41 +1,149 @@
from db import get_bill_data, get_bill_types, set_bill_type_growth from db import get_bill_data, get_bill_types, get_bill_freqs, set_bill_type_growth
from defines import END_YEAR
def derive_bill_data():
bd=get_bill_data()
bt=get_bill_types()
water_id = None
for t in bt:
if t['name'] == "Water":
water_id = t['id']
if not water_id: # give a bill dat in format YYYY-MM-DD, return quarter (1-4)
def qtr(d):
m = int(d[5:7])
return ( (m-1)//3 + 1 )
# missing annual bill, find date based on MM-DD and add new year - given we start with first_bill anyway, will only be used for future bill predictions
# future only, so add ann_growth (based on drop-down) for each future year
def add_missing_annual_bill_in_yr( bill_type, bill_info, num, yr ):
# print( f"{bill_type}: Seems we are missing an annual bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
mm_dd = bill_info[bill_type]['last_bill']['bill_date'][5:]
l_amt = bill_info[bill_type]['last_bill']['amount']
# print( f"{bill_type}: Should fake a bill into date={yr}-{mm_dd} of adjusted amount from base of {l_amt}" )
# okay the missing bill is before the first bill...
for i in range( bill_info[bill_type]['last_bill_year'], yr ):
l_amt += l_amt * 5.26/100
print( f"{bill_type}: So should insert bill as: ${l_amt:.02f} on '{yr}-{mm_dd}'")
return return
total={}
total[water_id]={}
for yr in [2022, 2023, 2024]:
print( f"water_id={water_id}")
total[water_id][yr] = 0
for b in bd:
if b['bill_type_id'] == water_id and str(yr) in b['bill_date']:
total[water_id][yr] += b['amount']
print( f"{yr} => {b['bill_date']} -- {b['amount']}" )
print( f"total for water in {yr} is {total[water_id][yr]}" )
# once we have all yr totals: # missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year
growth = {} # add growth (based on drop-down) for each future year
growth[water_id] = {} def add_missing_quarter_bills_in_yr( bill_type, bill_info, num, yr ):
max_growth = {} # print( f"{bill_type}: Seems we are missing a quarterly bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
avg_growth = {} return
max_growth[water_id] = 0
avg_growth[water_id] = 0 # missing monthly bills, find date based on DD and put in each missing month
count = 0 # add growth (based on drop-down) for each future year
for yr in [2023, 2024]: def add_missing_monthly_bills_in_yr( bill_type, bill_info, num, yr ):
growth[water_id][yr] = (total[water_id][yr] - total[water_id][yr-1]) / total[water_id][yr-1] * 100 # print( f"{bill_type}: Seems we are missing a monthly bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
avg_growth[water_id] += growth[water_id][yr] return
count += 1
if growth[water_id][yr] > max_growth[water_id]:
max_growth[water_id] = growth[water_id][yr] # go through the bill data from the DB, put it into more friendly formats, then
print( f"growth from {yr} to {yr-1} = {growth}%") # work out and then add missing bill data (might be b/c we have monthly bills,
print( f"Max growth was: {max_growth[water_id]}" ) # and I didn't want to input 12 of them at the same price), and it always
print( f"Avg growth is: {avg_growth[water_id]/count}" ) # occurs for future bills
set_bill_type_growth( water_id, avg_growth[water_id]/count ) def process_bill_data(bd, bt, bf):
# this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly)
bt_id_freq = {row["id"]: row["freq"] for row in bt}
# this maps freq to bills per annum (e.g. id=2 to 4 bills per annum)
bf_id_num = {row["id"]: row["num_bills_per_annum"] for row in bf}
# want to proces all bill data into easier to maniuplate structure, so make
# a bill_info[bill_id] with first_bill, last_bill, [yr] with matching bills to process
bill_info={}
for bill in bd:
bill_type = bill['bill_type_id']
yr= int(bill['bill_date'][:4])
# new bill type
if not bill_type in bill_info:
bill_info[bill_type]={}
bill_info[bill_type]['first_bill']={}
bill_info[bill_type]['last_bill']={}
# due to sql sorting, this first instance is the last bill
bill_info[bill_type]['last_bill']=bill
bill_info[bill_type]['last_bill_year']=int(bill['bill_date'][:4])
bill_info[bill_type]['year']={}
if not yr in bill_info[bill_type]['year']:
bill_info[bill_type]['year'][yr]=[]
# keep updating last to this matching bill
bill_info[bill_type]['first_bill']=bill
bill_info[bill_type]['first_bill_year']=int(bill['bill_date'][:4])
# add this bill to list for this year
bill_info[bill_type]['year'][yr].append(bill)
# now process the bill_info from yr of first bill to yr of last bill
for bill_type in bill_info:
# find freq id based on bill_type id, then use that to find num bills by freq id
num = bf_id_num[bt_id_freq[bill_type]]
if 'last_bill' not in bill_info[bill_type]:
print("Cannot process bill_type={bill_type} - no bill info for it at all" )
# range of years to process (yr_min to yr_max)
yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4])
yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4])
# go from first_bill year until reach end year
for yr in range( yr_min, END_YEAR+1 ):
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == num:
# print(f"{bill_type}: need {num} annual bills and found then for {yr}" )
continue
# if yr not in bill_info[bill_type]['year']:
# print(f"{bill_type}: need {num} annual bills and 0 found for {yr}" )
# else:
# print(f"{bill_type}: need {num} annual bills and only {len(bill_info[bill_type]['year'][yr])} found for {yr}" )
add_missing_bills_for_yr( bill_type, bill_info, num, yr )
# now should have missing bills, calculate ann growth properly
derive_ann_growth( bill_type, bill_info, num )
################################################################################
# add_missing_bills_for_yr -- wrapper to call right func based on bill freq
################################################################################
def add_missing_bills_for_yr( bill_type, bill_info, num, yr ):
if num == 1:
add_missing_annual_bill_in_yr( bill_type, bill_info, num, yr )
elif num == 4:
add_missing_quarter_bills_in_yr( bill_type, bill_info, num, yr )
elif num == 12:
add_missing_monthly_bills_in_yr( bill_type, bill_info, num, yr )
return
def derive_ann_growth( bill_type, bill_info, num ):
print(f"Derive annual growth on bill_type: {bill_type} " )
# DDP: rewrite loop below to use bill_info more cleverly, start with type, then year, then use the data in there rather than in bd
total={}
for yr in range( bill_info[bill_type]['first_bill_year'], bill_info[bill_type]['last_bill_year']+1):
total[yr] = 0
for b in bill_info[bill_type]['year'][yr]:
total[yr] += b['amount']
# print( f"{yr} => {b['bill_date']} -- {b['amount']}" )
# print( f"total for {bill_type} in {yr} is {total[yr]}" )
# once we have all yr totals:
growth = {}
min_growth = 999
avg_growth = 0
max_growth = 0
count = 0
for yr in range( bill_info[bill_type]['first_bill_year'], bill_info[bill_type]['last_bill_year']+1):
if yr-1 in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr-1]) != num:
# print(f"less than {num} bills in yr: {yr-1}, so can't use data" )
continue
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) != num:
# print(f"less than {num} bills in yr: {yr-1}, so can't use data" )
continue
if yr-1 in total and yr in total:
growth = (total[yr] - total[yr-1]) / total[yr-1] * 100
avg_growth += growth
count += 1
if growth < min_growth:
min_growth = growth
if growth > max_growth:
max_growth = growth
# print( f"growth from {yr} to {yr-1} = {growth}%")
if count:
print( f"Min growth was: {min_growth}" )
print( f"Avg growth is: {avg_growth/count}" )
print( f"Max growth was: {max_growth}" )
set_bill_type_growth( bill_type, avg_growth/count )
else:
# failsafe (just in case fill bills failed to add enough bills to average out)
print( f"Unable to calculate growth!" )