major rewrite, took on-board thoughts in TODO, have completely re-written how we process bill_data, and then subsequent growth. Much simpler now (although still complex) - most is now done in one loop to take DB data nd reformat it into an in memory data structure, then process that a few different ways to see missing and future bills, and then calc growths. Still much to go, I do calc missing/future annual bills, but I am not actually adding them to the DB (want to distinguish them from real bills still in DB), not yet calculating additional bills for monthly or quarterly (so not adding them to DB either), then interface would need to show/hide real vs auto-filled bills. To note growth only takes into account real bills, BUT, it also only calcs growth on consecuttive full year data sets - e.g. years with quarterly bills for less than the full year are ignored for now
This commit is contained in:
172
bills.py
172
bills.py
@@ -1,41 +1,149 @@
|
||||
from db import get_bill_data, get_bill_types, set_bill_type_growth
|
||||
from db import get_bill_data, get_bill_types, get_bill_freqs, set_bill_type_growth
|
||||
from defines import END_YEAR
|
||||
|
||||
def derive_bill_data():
|
||||
bd=get_bill_data()
|
||||
bt=get_bill_types()
|
||||
water_id = None
|
||||
for t in bt:
|
||||
if t['name'] == "Water":
|
||||
water_id = t['id']
|
||||
|
||||
if not water_id:
|
||||
# give a bill dat in format YYYY-MM-DD, return quarter (1-4)
|
||||
def qtr(d):
|
||||
m = int(d[5:7])
|
||||
return ( (m-1)//3 + 1 )
|
||||
|
||||
# missing annual bill, find date based on MM-DD and add new year - given we start with first_bill anyway, will only be used for future bill predictions
|
||||
# future only, so add ann_growth (based on drop-down) for each future year
|
||||
def add_missing_annual_bill_in_yr( bill_type, bill_info, num, yr ):
|
||||
# print( f"{bill_type}: Seems we are missing an annual bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
|
||||
mm_dd = bill_info[bill_type]['last_bill']['bill_date'][5:]
|
||||
l_amt = bill_info[bill_type]['last_bill']['amount']
|
||||
# print( f"{bill_type}: Should fake a bill into date={yr}-{mm_dd} of adjusted amount from base of {l_amt}" )
|
||||
# okay the missing bill is before the first bill...
|
||||
for i in range( bill_info[bill_type]['last_bill_year'], yr ):
|
||||
l_amt += l_amt * 5.26/100
|
||||
print( f"{bill_type}: So should insert bill as: ${l_amt:.02f} on '{yr}-{mm_dd}'")
|
||||
return
|
||||
|
||||
# missing quarterly bill, find date based on MM-DD and ??? - can have missing bilsl in first year
|
||||
# add growth (based on drop-down) for each future year
|
||||
def add_missing_quarter_bills_in_yr( bill_type, bill_info, num, yr ):
|
||||
# print( f"{bill_type}: Seems we are missing a quarterly bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
|
||||
return
|
||||
|
||||
# missing monthly bills, find date based on DD and put in each missing month
|
||||
# add growth (based on drop-down) for each future year
|
||||
def add_missing_monthly_bills_in_yr( bill_type, bill_info, num, yr ):
|
||||
# print( f"{bill_type}: Seems we are missing a monthly bill in {yr}, use first_bill={bill_info[bill_type]['first_bill']['bill_date']} to add one" )
|
||||
return
|
||||
|
||||
|
||||
# go through the bill data from the DB, put it into more friendly formats, then
|
||||
# work out and then add missing bill data (might be b/c we have monthly bills,
|
||||
# and I didn't want to input 12 of them at the same price), and it always
|
||||
# occurs for future bills
|
||||
def process_bill_data(bd, bt, bf):
|
||||
# this maps a bill id to a freq id (e.g. bill #34 - has a frequency of #2 (which might be quarterly)
|
||||
bt_id_freq = {row["id"]: row["freq"] for row in bt}
|
||||
|
||||
# this maps freq to bills per annum (e.g. id=2 to 4 bills per annum)
|
||||
bf_id_num = {row["id"]: row["num_bills_per_annum"] for row in bf}
|
||||
|
||||
# want to proces all bill data into easier to maniuplate structure, so make
|
||||
# a bill_info[bill_id] with first_bill, last_bill, [yr] with matching bills to process
|
||||
bill_info={}
|
||||
|
||||
for bill in bd:
|
||||
bill_type = bill['bill_type_id']
|
||||
yr= int(bill['bill_date'][:4])
|
||||
# new bill type
|
||||
if not bill_type in bill_info:
|
||||
bill_info[bill_type]={}
|
||||
bill_info[bill_type]['first_bill']={}
|
||||
bill_info[bill_type]['last_bill']={}
|
||||
# due to sql sorting, this first instance is the last bill
|
||||
bill_info[bill_type]['last_bill']=bill
|
||||
bill_info[bill_type]['last_bill_year']=int(bill['bill_date'][:4])
|
||||
bill_info[bill_type]['year']={}
|
||||
if not yr in bill_info[bill_type]['year']:
|
||||
bill_info[bill_type]['year'][yr]=[]
|
||||
|
||||
# keep updating last to this matching bill
|
||||
bill_info[bill_type]['first_bill']=bill
|
||||
bill_info[bill_type]['first_bill_year']=int(bill['bill_date'][:4])
|
||||
# add this bill to list for this year
|
||||
bill_info[bill_type]['year'][yr].append(bill)
|
||||
|
||||
# now process the bill_info from yr of first bill to yr of last bill
|
||||
for bill_type in bill_info:
|
||||
# find freq id based on bill_type id, then use that to find num bills by freq id
|
||||
num = bf_id_num[bt_id_freq[bill_type]]
|
||||
|
||||
if 'last_bill' not in bill_info[bill_type]:
|
||||
print("Cannot process bill_type={bill_type} - no bill info for it at all" )
|
||||
# range of years to process (yr_min to yr_max)
|
||||
yr_min=int(bill_info[bill_type]['first_bill']['bill_date'][:4])
|
||||
yr_max=int(bill_info[bill_type]['last_bill']['bill_date'][:4])
|
||||
|
||||
# go from first_bill year until reach end year
|
||||
for yr in range( yr_min, END_YEAR+1 ):
|
||||
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) == num:
|
||||
# print(f"{bill_type}: need {num} annual bills and found then for {yr}" )
|
||||
continue
|
||||
# if yr not in bill_info[bill_type]['year']:
|
||||
# print(f"{bill_type}: need {num} annual bills and 0 found for {yr}" )
|
||||
# else:
|
||||
# print(f"{bill_type}: need {num} annual bills and only {len(bill_info[bill_type]['year'][yr])} found for {yr}" )
|
||||
add_missing_bills_for_yr( bill_type, bill_info, num, yr )
|
||||
# now should have missing bills, calculate ann growth properly
|
||||
derive_ann_growth( bill_type, bill_info, num )
|
||||
|
||||
################################################################################
|
||||
# add_missing_bills_for_yr -- wrapper to call right func based on bill freq
|
||||
################################################################################
|
||||
def add_missing_bills_for_yr( bill_type, bill_info, num, yr ):
|
||||
if num == 1:
|
||||
add_missing_annual_bill_in_yr( bill_type, bill_info, num, yr )
|
||||
elif num == 4:
|
||||
add_missing_quarter_bills_in_yr( bill_type, bill_info, num, yr )
|
||||
elif num == 12:
|
||||
add_missing_monthly_bills_in_yr( bill_type, bill_info, num, yr )
|
||||
return
|
||||
|
||||
def derive_ann_growth( bill_type, bill_info, num ):
|
||||
print(f"Derive annual growth on bill_type: {bill_type} " )
|
||||
# DDP: rewrite loop below to use bill_info more cleverly, start with type, then year, then use the data in there rather than in bd
|
||||
|
||||
total={}
|
||||
total[water_id]={}
|
||||
for yr in [2022, 2023, 2024]:
|
||||
print( f"water_id={water_id}")
|
||||
total[water_id][yr] = 0
|
||||
for b in bd:
|
||||
if b['bill_type_id'] == water_id and str(yr) in b['bill_date']:
|
||||
total[water_id][yr] += b['amount']
|
||||
print( f"{yr} => {b['bill_date']} -- {b['amount']}" )
|
||||
print( f"total for water in {yr} is {total[water_id][yr]}" )
|
||||
for yr in range( bill_info[bill_type]['first_bill_year'], bill_info[bill_type]['last_bill_year']+1):
|
||||
total[yr] = 0
|
||||
for b in bill_info[bill_type]['year'][yr]:
|
||||
total[yr] += b['amount']
|
||||
# print( f"{yr} => {b['bill_date']} -- {b['amount']}" )
|
||||
# print( f"total for {bill_type} in {yr} is {total[yr]}" )
|
||||
|
||||
# once we have all yr totals:
|
||||
growth = {}
|
||||
growth[water_id] = {}
|
||||
max_growth = {}
|
||||
avg_growth = {}
|
||||
max_growth[water_id] = 0
|
||||
avg_growth[water_id] = 0
|
||||
min_growth = 999
|
||||
avg_growth = 0
|
||||
max_growth = 0
|
||||
count = 0
|
||||
for yr in [2023, 2024]:
|
||||
growth[water_id][yr] = (total[water_id][yr] - total[water_id][yr-1]) / total[water_id][yr-1] * 100
|
||||
avg_growth[water_id] += growth[water_id][yr]
|
||||
for yr in range( bill_info[bill_type]['first_bill_year'], bill_info[bill_type]['last_bill_year']+1):
|
||||
if yr-1 in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr-1]) != num:
|
||||
# print(f"less than {num} bills in yr: {yr-1}, so can't use data" )
|
||||
continue
|
||||
if yr in bill_info[bill_type]['year'] and len(bill_info[bill_type]['year'][yr]) != num:
|
||||
# print(f"less than {num} bills in yr: {yr-1}, so can't use data" )
|
||||
continue
|
||||
if yr-1 in total and yr in total:
|
||||
growth = (total[yr] - total[yr-1]) / total[yr-1] * 100
|
||||
avg_growth += growth
|
||||
count += 1
|
||||
if growth[water_id][yr] > max_growth[water_id]:
|
||||
max_growth[water_id] = growth[water_id][yr]
|
||||
print( f"growth from {yr} to {yr-1} = {growth}%")
|
||||
print( f"Max growth was: {max_growth[water_id]}" )
|
||||
print( f"Avg growth is: {avg_growth[water_id]/count}" )
|
||||
set_bill_type_growth( water_id, avg_growth[water_id]/count )
|
||||
if growth < min_growth:
|
||||
min_growth = growth
|
||||
if growth > max_growth:
|
||||
max_growth = growth
|
||||
# print( f"growth from {yr} to {yr-1} = {growth}%")
|
||||
if count:
|
||||
print( f"Min growth was: {min_growth}" )
|
||||
print( f"Avg growth is: {avg_growth/count}" )
|
||||
print( f"Max growth was: {max_growth}" )
|
||||
set_bill_type_growth( bill_type, avg_growth/count )
|
||||
else:
|
||||
# failsafe (just in case fill bills failed to add enough bills to average out)
|
||||
print( f"Unable to calculate growth!" )
|
||||
|
||||
Reference in New Issue
Block a user