over-thought version of new move, about to radically simplify, committing so I dont lost it just in case

This commit is contained in:
2022-01-07 15:08:44 +11:00
parent ef15fcff99
commit ff41164eff

View File

@@ -802,6 +802,7 @@ def AddDir(job, dirname, in_dir, rel_path, in_path ):
if dir:
e=session.query(Entry).get(dir.eid)
e.exists_on_fs=True
print("returning existing entry for AddDir" )
return dir
dir=Dir( last_import_date=0, rel_path=rel_path, in_path=in_path )
dtype=session.query(FileType).filter(FileType.name=='Directory').first()
@@ -810,8 +811,10 @@ def AddDir(job, dirname, in_dir, rel_path, in_path ):
# no in_dir occurs when we Add the actual Dir for the Path (top of the tree)
if in_dir:
e.in_dir=in_dir
print( f"DDP: set in_dir for {e.name} with in_dir={in_dir}" )
if DEBUG:
AddLogForJob(job, f"DEBUG: Process new dir: {dirname}, rel_path={rel_path}")
print( f"DDP: Process new dir: e={e}" )
session.add(e)
return dir
@@ -1008,6 +1011,144 @@ def MoveFileToRecycleBin(job,del_me):
CleanUpDirInDB(job, parent_dir_e)
return
####################################################################################################################################
# move_me: (single) ENTRY being moved to a different folder. It might be a FILE or a DIR
# dst_storage_path: the path move_me is going into (import/storage, etc.) -- *might* be new or the same
# dst_rel_path: the relative path in the (new?) path, that is the new location
#
# possible scenarios:
# 1: move_me is a File and it is being moved to a new folder (and maybe a new PATH)
# 2: move_me is a File and it is being moved a 'new' folder (that already exists) and its is a duplicate by name
# 3: move_me is a Directory and it is being moved to a new folder (and maybe a new PATH)
# This then requires all sub_dirs of move_me to have their path/rel_path's reset
# 4: move_me is a Directory and it is being moved a 'new' folder (that already exists) and some entry/entries have the same name(s)
# This then requires all sub_dirs of move_me to have their path/rel_path's reset
# BUT, if move_me has the same name as dst_storage_path/dst_rel_path:
# This then requires all sub_dirs of move_me to have their contents moved to the existing DIR in the DB for the destination
# AND this could also entail a sub1/sub2 or sub1/sub2/file1.jpg need to handle clashes
#
# So go entry by entry, if we are a Dir, recurse into it (and process it too), if we are an File process it.
####################################################################################################################################
def MoveEntriesToOtherFolder(job, move_me, dst_storage_path, dst_rel_path):
print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})")
# keep the dir of this entry to check if it is empty later on (for a file use in_dir, for a dir, choose itself, as in scen 3 it needs to be deleted)
if move_me.type.name == "Directory":
orig_dir_id = move_me.id
else:
orig_dir_id = move_me.in_dir.eid
# see if there is an existing dir of new dst_rel_path already
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first()
if dir:
dst_dir=dir.dir_details
else:
# okay, go through new relative path and AddDir any missing subdirs of this
# path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will
# create Storage/Dir1, Storage/Dir1/Dir2
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first()
part_rel_path=""
for dirname in dst_rel_path.split("/"):
part_rel_path += f"{dirname}"
if DEBUG:
print( f"Should make/find a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" )
dst_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
try:
new_dirname=dst_storage_path.path_prefix + '/' + part_rel_path
print( f"CREATE fs dir: {new_dirname}" )
os.makedirs( new_dirname,mode=0o777, exist_ok=True )
except Exception as e:
print( f"ERROR: Failed to make new dir(s) at new location on filesystem, err: {e}")
parent_dir=dst_dir
part_rel_path += "/"
session.commit()
###
# by here, dst_dir is either the existing relavant DIR at new location, or a new DIR at new location
###
# get entries that are in move_me DIR and deal with them one-by-one
if move_me.type.name == "Directory":
sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==move_me.id).all()
for sub_ent in sub_ents:
MoveEntryToOtherFolder( job, sub_ent, dst_dir )
# just a file, so move me to new dst_dir
else:
MoveEntryToOtherFolder( job, move_me, dst_dir )
# if this is now empty, this will deal with it, if any content left, this will do nothing
old_dir = session.query(Entry).filter(Entry.id==orig_dir_id).first()
CleanUpDirInDB(job, old_dir)
return
####################################################################################################################################
# This func is recursively called to move the single ENTRY: move_me entry to the new DIR: dst_dir
# dst_path is passed in as well as we can recurse into the original Dir structures that are in the (potentially) old path
# and instead of making a new Dir, etc. in the DB, for optimising, we just reuse the (old) Dir by resetting its rel_path and Path
#
# This is where the ENTRY has its in_dir and in_path reset appropriately.
####################################################################################################################################
def MoveEntryToOtherFolder(job, move_me, dst_dir ):
print( f"DEBUG: MoveEntryToOtherFolder( job={job.id}, move_me={move_me.name}, dst_dir={dst_dir.eid} )")
orig_name=move_me.name
orig_fs_pos=move_me.FullPathOnFS()
if move_me.type.name == "Directory":
# get sub_ents now before we (potentially) mess with dst_dir
sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==dst_dir.eid).all()
# need to check to see if existing dir in new location, (eg if TEST/T1/2018 -> TEST/2018 and we now are moving TEST/T1/2018/A ->TEST/2018/ - then A might exist in both)
# at this point move_me is TEST/T1/2018/A, and dst_dir is TEST/2018, so look for a DIR with rel_path of dst_dir.rel_path + / + move_me.name
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_dir.in_path.id).filter(Dir.rel_path==dst_dir.rel_path+'/'+move_me.name).first()
# new location has same named dir, so lets use it, rather than use move_me
if dir:
dst_dir=dir.dir_details
else:
# no need to create a new Dir object, we can use 'move_me' as it is # unique to the new destination
print( f"DDP: need to reset dst_dir's rel_path to new location!" )
print( f"### DDP: dst_dir={dst_dir}" )
print( f"### DDP: move_me={move_me}" )
# reset this Dir's rel_path to its parents (dst_dir), plus its name
move_me.dir_details.rel_path = dst_dir.rel_path + '/' + move_me.name
move_me.in_dir = dst_dir
move_me.in_path = dst_dir.in_path
print( f"### DDP: move_me={move_me}" )
session.add(move_me)
# move the actual dir to its new location
print( f"DDP: doing actual FS move for DIR - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
os.replace( orig_fs_pos, move_me.FullPathOnFS() )
return
for sub_ent in sub_ents:
print( f"DDP: in Move, going through sub-ents, dst_dir={dst_dir}" )
print( f"DDP: in Move, going through sub-ents, move_me={move_me}" )
MoveEntryToOtherFolder( job, sub_ent, dst_dir )
# given the else above returns, then the move_me directory was a duplicate, and we have moved its contents, so its now empty in the old fstree so remove it
CleanUpDirInDB(job, move_me)
else:
# check for duplicate name?
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==move_me.name,Dir.eid==dst_dir.eid).first()
if e:
print( f"Moving {move_me.name} and it is a duplicate of a another file (by name)" )
# as we have original fs_pos, we can just rename new to be Move of XXX, and it will move it to that new name
move_me.name = 'Move of ' + move_me.name
# its a unique file in this new structure, so just make sure it is in the right DIR
print( f"change orig_name={orig_name} -> {move_me} to {dst_dir}" )
move_me.in_dir = dst_dir
move_me.in_path = dst_dir.in_path
session.add(move_me)
print( f"move_me is now {move_me}" )
# move the actual file to its new location
print( f"DDP: doing actual FS move for FILE - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
os.replace( orig_fs_pos, move_me.FullPathOnFS() )
return
####################################################################################################################################
# Function that moves a file into a new folder in any path (usually form import to storage) - if needed it makes the folder on the FS,
# moves the file into the folder on the FS and then changes the path to the appropriate one
@@ -1015,19 +1156,33 @@ def MoveFileToRecycleBin(job,del_me):
def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path):
orig_parent_dir_e=session.query(Entry).get(move_me.in_dir.eid)
if DEBUG:
print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_storage_path}")
try:
dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path
if DEBUG:
print( f"would make dir: {dst_dir}" )
os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=move_me.FullPathOnFS()
dst=dst_dir + '/' + move_me.name
os.replace( src, dst )
if DEBUG:
print( f"would mv {src} {dst}" )
except Exception as e:
print( f"ERROR: Failed to move file to new location on filesystem, err: {e}")
print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_rel_path}")
# NEED TO SEE IF dst_rel_path already exists in dst_storage_path
move_to_existing=None
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first()
if dir:
print( f"DEBUG: Okay, moving to an existing folder: ")
move_to_existing=dir
# IF it does, then don't move the folder, move its contents on the FS -- # RECURSIVELY!!! (as a sub_dir could also exist in new dst/sub_dir)
# IF it does, then don't just fudge the parent dir, reset the children entries to the existing Dir
if move_to_existing == None:
try:
dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path
if DEBUG:
print( f"would make dir: {dst_dir}" )
os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=move_me.FullPathOnFS()
dst=dst_dir + '/' + move_me.name
os.replace( src, dst )
if DEBUG:
print( f"would mv {src} {dst}" )
except Exception as e:
print( f"ERROR: Failed to move file to new location on filesystem, err: {e}")
else:
# might be able to do a cp -a, and then del old files? (would work in # theory)
print( "Not moving files on FS as yet" )
# need these for AddDir calls below to work
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first()
@@ -1039,12 +1194,14 @@ def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path):
for dirname in dst_rel_path.split("/"):
part_rel_path += f"{dirname}"
if DEBUG:
print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in storage path" )
print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" )
new_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
print("OKAY, we called AddDir, but hopefully it just used existing.")
parent_dir=new_dir
part_rel_path += "/"
if DEBUG:
print( f"now should change {move_me} in_dir to {new_dir} created above in {dst_storage_path}" )
move_me.in_dir = new_dir
move_me.in_path = dst_storage_path
if move_me.type.name == "Directory":
@@ -1052,16 +1209,37 @@ def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path):
print( f"{move_me.name} is a dir, so reset its dir_details path to the new path too" )
move_me.dir_details.in_path = dst_storage_path
move_me.dir_details.rel_path = move_me.in_dir.rel_path+'/'+move_me.name
ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path )
if move_to_existing:
# okay, need to take all direct children move them to existing dir instead
MoveContentToNewDirInDB( move_me, move_to_existing )
ResetAnySubdirPaths( move_to_existing, dst_storage_path, move_to_existing.dir_details.rel_path )
else:
ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path )
if DEBUG:
print( f"DONE change of {move_me} in_dir to {new_dir} created above" )
session.add(move_me)
CleanUpDirInDB(job, orig_parent_dir_e)
# reset last_hash_date otherwise, the move resets ctime on the FS, and so scanning sees a 'new' file
move_me.file_details.last_hash_date = time.time()
if move_me.type.name != "Directory":
move_me.file_details.last_hash_date = time.time()
AddLogForJob(job, f"{move_me.name} - (moved to {os.path.dirname(move_me.FullPathOnFS())})" )
return
####################################################################################################################################
# moves all entries in 'old_dir' to 'new_dir' - At the moment only used when we move
# a directory from one part of the fs tree over to an existing part of the fs tree
####################################################################################################################################
def MoveContentToNewDirInDB( old_dir, new_dir ):
sub_ents = session.query(Entry).join(FileType).join(EntryDirLink).filter(EntryDirLink.dir_eid==old_dir.id).all()
for sub_ent in sub_ents:
sub_ent.in_dir = new_dir
session.add(sub_ent)
return
####################################################################################################################################
# take a dir that is being moved, and reset its own and any sub dirs rel_paths,
# to the new PATH and relevant rel_path
####################################################################################################################################
def ResetAnySubdirPaths( moving_dir, dst_storage_path, parent_rel_path ):
if DEBUG:
print( f"ResetAnySubdirPaths( {moving_dir.name}, {dst_storage_path.path_prefix}, {parent_rel_path} )" )
@@ -1618,9 +1796,9 @@ def JobMoveFiles(job):
for jex in job.extra:
if 'eid-' in jex.name:
# move_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first()
move_me=session.query(Entry).get(jex.value)
MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" )
MoveEntriesToOtherFolder( job, move_me, dst_storage_path, f"{prefix}{suffix}" )
# MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" )
now=datetime.now(pytz.utc)
next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
session.add(next_job)