From 665f45c03e9767d027f54aa45d27923f778d7bb1 Mon Sep 17 00:00:00 2001 From: Damien De Paoli Date: Fri, 7 Jan 2022 17:32:23 +1100 Subject: [PATCH] working condensed / logical version of moving on Files system - works for multiple scenarios --- pa_job_manager.py | 248 ++++++++++++---------------------------------- 1 file changed, 61 insertions(+), 187 deletions(-) diff --git a/pa_job_manager.py b/pa_job_manager.py index fa07fa3..e1a04c6 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -1014,227 +1014,102 @@ def MoveFileToRecycleBin(job,del_me): #################################################################################################################################### # move_me: (single) ENTRY being moved to a different folder. It might be a FILE or a DIR -# dst_storage_path: the path move_me is going into (import/storage, etc.) -- *might* be new or the same +# dst_storage_path: the path move_me is going into (import/storage, etc.) # dst_rel_path: the relative path in the (new?) path, that is the new location +# (might contain any combo of existing/new dirs in the location) # # possible scenarios: # 1: move_me is a File and it is being moved to a new folder (and maybe a new PATH) -# 2: move_me is a File and it is being moved a 'new' folder (that already exists) and its is a duplicate by name +# 2: move_me is a File and it is being moved to an existing folder +# need to just make sure that there is not a duplicate name for this file in existing folder # 3: move_me is a Directory and it is being moved to a new folder (and maybe a new PATH) +# need to 'create' new Dir in new location (really just a logical move of the DB entry) # This then requires all sub_dirs of move_me to have their path/rel_path's reset -# 4: move_me is a Directory and it is being moved a 'new' folder (that already exists) and some entry/entries have the same name(s) +# 4: move_me is a Directory and it is being moved an existing folder +# need to move old Dir INTO existing folder # This then requires all sub_dirs of move_me to have their path/rel_path's reset -# BUT, if move_me has the same name as dst_storage_path/dst_rel_path: -# This then requires all sub_dirs of move_me to have their contents moved to the existing DIR in the DB for the destination -# AND this could also entail a sub1/sub2 or sub1/sub2/file1.jpg need to handle clashes # -# So go entry by entry, if we are a Dir, recurse into it (and process it too), if we are an File process it. #################################################################################################################################### def MoveEntriesToOtherFolder(job, move_me, dst_storage_path, dst_rel_path): - - print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})") - # keep the dir of this entry to check if it is empty later on (for a file use in_dir, for a dir, choose itself, as in scen 3 it needs to be deleted) - if move_me.type.name == "Directory": - orig_dir_id = move_me.id - else: - orig_dir_id = move_me.in_dir.eid - - # see if there is an existing dir of new dst_rel_path already - dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first() - if dir: - dst_dir=dir.dir_details - else: - - # okay, go through new relative path and AddDir any missing subdirs of this - # path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will - # create Storage/Dir1, Storage/Dir1/Dir2 - parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first() - part_rel_path="" - for dirname in dst_rel_path.split("/"): - part_rel_path += f"{dirname}" - if DEBUG: - print( f"Should make/find a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" ) - dst_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) - try: - new_dirname=dst_storage_path.path_prefix + '/' + part_rel_path - print( f"CREATE fs dir: {new_dirname}" ) - os.makedirs( new_dirname,mode=0o777, exist_ok=True ) - except Exception as e: - print( f"ERROR: Failed to make new dir(s) at new location on filesystem, err: {e}") - parent_dir=dst_dir - part_rel_path += "/" - session.commit() - - ### - # by here, dst_dir is either the existing relavant DIR at new location, or a new DIR at new location - ### - - # get entries that are in move_me DIR and deal with them one-by-one - if move_me.type.name == "Directory": - sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==move_me.id).all() - for sub_ent in sub_ents: - MoveEntryToOtherFolder( job, sub_ent, dst_dir ) - # just a file, so move me to new dst_dir - else: - MoveEntryToOtherFolder( job, move_me, dst_dir ) - - # if this is now empty, this will deal with it, if any content left, this will do nothing - old_dir = session.query(Entry).filter(Entry.id==orig_dir_id).first() - CleanUpDirInDB(job, old_dir) - return - - -#################################################################################################################################### -# This func is recursively called to move the single ENTRY: move_me entry to the new DIR: dst_dir -# dst_path is passed in as well as we can recurse into the original Dir structures that are in the (potentially) old path -# and instead of making a new Dir, etc. in the DB, for optimising, we just reuse the (old) Dir by resetting its rel_path and Path -# -# This is where the ENTRY has its in_dir and in_path reset appropriately. -#################################################################################################################################### -def MoveEntryToOtherFolder(job, move_me, dst_dir ): - print( f"DEBUG: MoveEntryToOtherFolder( job={job.id}, move_me={move_me.name}, dst_dir={dst_dir.eid} )") - + if DEBUG: + print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})") orig_name=move_me.name orig_fs_pos=move_me.FullPathOnFS() if move_me.type.name == "Directory": - # get sub_ents now before we (potentially) mess with dst_dir - sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==dst_dir.eid).all() - - # need to check to see if existing dir in new location, (eg if TEST/T1/2018 -> TEST/2018 and we now are moving TEST/T1/2018/A ->TEST/2018/ - then A might exist in both) - # at this point move_me is TEST/T1/2018/A, and dst_dir is TEST/2018, so look for a DIR with rel_path of dst_dir.rel_path + / + move_me.name - dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_dir.in_path.id).filter(Dir.rel_path==dst_dir.rel_path+'/'+move_me.name).first() - # new location has same named dir, so lets use it, rather than use move_me - if dir: - dst_dir=dir.dir_details - else: - # no need to create a new Dir object, we can use 'move_me' as it is # unique to the new destination - print( f"DDP: need to reset dst_dir's rel_path to new location!" ) - print( f"### DDP: dst_dir={dst_dir}" ) - print( f"### DDP: move_me={move_me}" ) - # reset this Dir's rel_path to its parents (dst_dir), plus its name + # see if there is an existing dir of new dst_rel_path already + parent_dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first() + if parent_dir: + # scen 4 move move_me into existing dst_dir of requested location + dst_dir=parent_dir.dir_details + # remember, we are moving (move_me - a dir) into this existing dir (dst_dir), so add the name to dst_dir's rel_path... move_me.dir_details.rel_path = dst_dir.rel_path + '/' + move_me.name move_me.in_dir = dst_dir move_me.in_path = dst_dir.in_path - print( f"### DDP: move_me={move_me}" ) session.add(move_me) # move the actual dir to its new location - print( f"DDP: doing actual FS move for DIR - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) + AddLogForJob( job, f"INFO: move {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) + os.replace( orig_fs_pos, move_me.FullPathOnFS() ) + # we use the new path to this new Dir with the full location (the old dir is put into the new location) + ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path ) + else: + # scen 3 -- effectively renaming a dir + # we have a new path (dst_rel_path)... The last component of that is what we will rename move_me to. + # however, it couild be a path of existing/new or a combo up to the rename part, so use AddDir to find/create + # as needed to then "move" (rename) move_me to into the full dst_rel_path + part_rel_path="" + for dirname in os.path.dirname(dst_rel_path).split("/"): + part_rel_path += f"{dirname}" + parent_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) + part_rel_path += "/" + move_me.in_dir = parent_dir + move_me.dir_details.rel_path = dst_rel_path + move_me.name = os.path.basename(dst_rel_path) + ResetAnySubdirPaths( move_me, dst_storage_path, dst_rel_path ) + AddLogForJob( job, f"INFO: move {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) os.replace( orig_fs_pos, move_me.FullPathOnFS() ) return - - for sub_ent in sub_ents: - print( f"DDP: in Move, going through sub-ents, dst_dir={dst_dir}" ) - print( f"DDP: in Move, going through sub-ents, move_me={move_me}" ) - MoveEntryToOtherFolder( job, sub_ent, dst_dir ) - - # given the else above returns, then the move_me directory was a duplicate, and we have moved its contents, so its now empty in the old fstree so remove it - CleanUpDirInDB(job, move_me) else: - # check for duplicate name? + # for a file, just get the top of the Path, and then we will use it to + # potentially make all the sub dirs for the new location + parent_dir=session.query(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path=='').first() + + # + # TODO: this create Dirs in Path is everywhere, make it a function BUT + # also, the code above for the Dirs is very similar, could wrap them + # both with slightly different params and it would work... + # MAKE THIS A FUNCTION + # + part_rel_path="" + for dirname in dst_rel_path.split("/"): + part_rel_path += f"{dirname}" + parent_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) + part_rel_path += "/" + dst_dir=parent_dir + + # just make sure the Dir tree exists on the FS + os.makedirs( dst_storage_path.path_prefix + '/' + dst_rel_path, mode=0o777, exist_ok=True ) + + # check for duplicate name? (scen 2) e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==move_me.name,Dir.eid==dst_dir.eid).first() if e: - print( f"Moving {move_me.name} and it is a duplicate of a another file (by name)" ) + AddLogForJob( job, f"INFO: Moving {move_me.name} and it is a duplicate of a another file (by name), prepending 'Move of'" ) # as we have original fs_pos, we can just rename new to be Move of XXX, and it will move it to that new name move_me.name = 'Move of ' + move_me.name # its a unique file in this new structure, so just make sure it is in the right DIR - print( f"change orig_name={orig_name} -> {move_me} to {dst_dir}" ) + orig_dir_eid = move_me.in_dir.eid move_me.in_dir = dst_dir move_me.in_path = dst_dir.in_path session.add(move_me) - print( f"move_me is now {move_me}" ) # move the actual file to its new location - print( f"DDP: doing actual FS move for FILE - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) + AddLogForJob( job, f"DEBUG: move of FILE - {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) os.replace( orig_fs_pos, move_me.FullPathOnFS() ) + + old_dir = session.query(Entry).filter(Entry.id==orig_dir_eid).first() + CleanUpDirInDB(job, old_dir) return -#################################################################################################################################### -# Function that moves a file into a new folder in any path (usually form import to storage) - if needed it makes the folder on the FS, -# moves the file into the folder on the FS and then changes the path to the appropriate one -#################################################################################################################################### -def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path): - orig_parent_dir_e=session.query(Entry).get(move_me.in_dir.eid) - if DEBUG: - print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_rel_path}") - - # NEED TO SEE IF dst_rel_path already exists in dst_storage_path - move_to_existing=None - dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first() - if dir: - print( f"DEBUG: Okay, moving to an existing folder: ") - move_to_existing=dir - # IF it does, then don't move the folder, move its contents on the FS -- # RECURSIVELY!!! (as a sub_dir could also exist in new dst/sub_dir) - # IF it does, then don't just fudge the parent dir, reset the children entries to the existing Dir - - if move_to_existing == None: - try: - dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path - if DEBUG: - print( f"would make dir: {dst_dir}" ) - os.makedirs( dst_dir,mode=0o777, exist_ok=True ) - src=move_me.FullPathOnFS() - dst=dst_dir + '/' + move_me.name - os.replace( src, dst ) - if DEBUG: - print( f"would mv {src} {dst}" ) - except Exception as e: - print( f"ERROR: Failed to move file to new location on filesystem, err: {e}") - else: - # might be able to do a cp -a, and then del old files? (would work in # theory) - print( "Not moving files on FS as yet" ) - - # need these for AddDir calls below to work - parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first() - - # okay, go through new relative path and AddDir any missing subdirs of this - # path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will - # create Storage/Dir1, Storage/Dir1/Dir2 - part_rel_path="" - for dirname in dst_rel_path.split("/"): - part_rel_path += f"{dirname}" - if DEBUG: - print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" ) - new_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) - print("OKAY, we called AddDir, but hopefully it just used existing.") - parent_dir=new_dir - part_rel_path += "/" - if DEBUG: - print( f"now should change {move_me} in_dir to {new_dir} created above in {dst_storage_path}" ) - - move_me.in_dir = new_dir - move_me.in_path = dst_storage_path - if move_me.type.name == "Directory": - if DEBUG: - print( f"{move_me.name} is a dir, so reset its dir_details path to the new path too" ) - move_me.dir_details.in_path = dst_storage_path - move_me.dir_details.rel_path = move_me.in_dir.rel_path+'/'+move_me.name - if move_to_existing: - # okay, need to take all direct children move them to existing dir instead - MoveContentToNewDirInDB( move_me, move_to_existing ) - ResetAnySubdirPaths( move_to_existing, dst_storage_path, move_to_existing.dir_details.rel_path ) - else: - ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path ) - if DEBUG: - print( f"DONE change of {move_me} in_dir to {new_dir} created above" ) - session.add(move_me) - CleanUpDirInDB(job, orig_parent_dir_e) - # reset last_hash_date otherwise, the move resets ctime on the FS, and so scanning sees a 'new' file - if move_me.type.name != "Directory": - move_me.file_details.last_hash_date = time.time() - AddLogForJob(job, f"{move_me.name} - (moved to {os.path.dirname(move_me.FullPathOnFS())})" ) - return - -#################################################################################################################################### -# moves all entries in 'old_dir' to 'new_dir' - At the moment only used when we move -# a directory from one part of the fs tree over to an existing part of the fs tree -#################################################################################################################################### -def MoveContentToNewDirInDB( old_dir, new_dir ): - sub_ents = session.query(Entry).join(FileType).join(EntryDirLink).filter(EntryDirLink.dir_eid==old_dir.id).all() - for sub_ent in sub_ents: - sub_ent.in_dir = new_dir - session.add(sub_ent) - return #################################################################################################################################### # take a dir that is being moved, and reset its own and any sub dirs rel_paths, @@ -1798,7 +1673,6 @@ def JobMoveFiles(job): if 'eid-' in jex.name: move_me=session.query(Entry).get(jex.value) MoveEntriesToOtherFolder( job, move_me, dst_storage_path, f"{prefix}{suffix}" ) -# MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" ) now=datetime.now(pytz.utc) next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 ) session.add(next_job)