diff --git a/pa_job_manager.py b/pa_job_manager.py index 56a3fb4..fa07fa3 100644 --- a/pa_job_manager.py +++ b/pa_job_manager.py @@ -802,6 +802,7 @@ def AddDir(job, dirname, in_dir, rel_path, in_path ): if dir: e=session.query(Entry).get(dir.eid) e.exists_on_fs=True + print("returning existing entry for AddDir" ) return dir dir=Dir( last_import_date=0, rel_path=rel_path, in_path=in_path ) dtype=session.query(FileType).filter(FileType.name=='Directory').first() @@ -810,8 +811,10 @@ def AddDir(job, dirname, in_dir, rel_path, in_path ): # no in_dir occurs when we Add the actual Dir for the Path (top of the tree) if in_dir: e.in_dir=in_dir + print( f"DDP: set in_dir for {e.name} with in_dir={in_dir}" ) if DEBUG: AddLogForJob(job, f"DEBUG: Process new dir: {dirname}, rel_path={rel_path}") + print( f"DDP: Process new dir: e={e}" ) session.add(e) return dir @@ -1008,6 +1011,144 @@ def MoveFileToRecycleBin(job,del_me): CleanUpDirInDB(job, parent_dir_e) return + +#################################################################################################################################### +# move_me: (single) ENTRY being moved to a different folder. It might be a FILE or a DIR +# dst_storage_path: the path move_me is going into (import/storage, etc.) -- *might* be new or the same +# dst_rel_path: the relative path in the (new?) path, that is the new location +# +# possible scenarios: +# 1: move_me is a File and it is being moved to a new folder (and maybe a new PATH) +# 2: move_me is a File and it is being moved a 'new' folder (that already exists) and its is a duplicate by name +# 3: move_me is a Directory and it is being moved to a new folder (and maybe a new PATH) +# This then requires all sub_dirs of move_me to have their path/rel_path's reset +# 4: move_me is a Directory and it is being moved a 'new' folder (that already exists) and some entry/entries have the same name(s) +# This then requires all sub_dirs of move_me to have their path/rel_path's reset +# BUT, if move_me has the same name as dst_storage_path/dst_rel_path: +# This then requires all sub_dirs of move_me to have their contents moved to the existing DIR in the DB for the destination +# AND this could also entail a sub1/sub2 or sub1/sub2/file1.jpg need to handle clashes +# +# So go entry by entry, if we are a Dir, recurse into it (and process it too), if we are an File process it. +#################################################################################################################################### +def MoveEntriesToOtherFolder(job, move_me, dst_storage_path, dst_rel_path): + + print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})") + # keep the dir of this entry to check if it is empty later on (for a file use in_dir, for a dir, choose itself, as in scen 3 it needs to be deleted) + if move_me.type.name == "Directory": + orig_dir_id = move_me.id + else: + orig_dir_id = move_me.in_dir.eid + + # see if there is an existing dir of new dst_rel_path already + dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first() + if dir: + dst_dir=dir.dir_details + else: + + # okay, go through new relative path and AddDir any missing subdirs of this + # path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will + # create Storage/Dir1, Storage/Dir1/Dir2 + parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first() + part_rel_path="" + for dirname in dst_rel_path.split("/"): + part_rel_path += f"{dirname}" + if DEBUG: + print( f"Should make/find a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" ) + dst_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) + try: + new_dirname=dst_storage_path.path_prefix + '/' + part_rel_path + print( f"CREATE fs dir: {new_dirname}" ) + os.makedirs( new_dirname,mode=0o777, exist_ok=True ) + except Exception as e: + print( f"ERROR: Failed to make new dir(s) at new location on filesystem, err: {e}") + parent_dir=dst_dir + part_rel_path += "/" + session.commit() + + ### + # by here, dst_dir is either the existing relavant DIR at new location, or a new DIR at new location + ### + + # get entries that are in move_me DIR and deal with them one-by-one + if move_me.type.name == "Directory": + sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==move_me.id).all() + for sub_ent in sub_ents: + MoveEntryToOtherFolder( job, sub_ent, dst_dir ) + # just a file, so move me to new dst_dir + else: + MoveEntryToOtherFolder( job, move_me, dst_dir ) + + # if this is now empty, this will deal with it, if any content left, this will do nothing + old_dir = session.query(Entry).filter(Entry.id==orig_dir_id).first() + CleanUpDirInDB(job, old_dir) + return + + +#################################################################################################################################### +# This func is recursively called to move the single ENTRY: move_me entry to the new DIR: dst_dir +# dst_path is passed in as well as we can recurse into the original Dir structures that are in the (potentially) old path +# and instead of making a new Dir, etc. in the DB, for optimising, we just reuse the (old) Dir by resetting its rel_path and Path +# +# This is where the ENTRY has its in_dir and in_path reset appropriately. +#################################################################################################################################### +def MoveEntryToOtherFolder(job, move_me, dst_dir ): + print( f"DEBUG: MoveEntryToOtherFolder( job={job.id}, move_me={move_me.name}, dst_dir={dst_dir.eid} )") + + orig_name=move_me.name + orig_fs_pos=move_me.FullPathOnFS() + + if move_me.type.name == "Directory": + # get sub_ents now before we (potentially) mess with dst_dir + sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==dst_dir.eid).all() + + # need to check to see if existing dir in new location, (eg if TEST/T1/2018 -> TEST/2018 and we now are moving TEST/T1/2018/A ->TEST/2018/ - then A might exist in both) + # at this point move_me is TEST/T1/2018/A, and dst_dir is TEST/2018, so look for a DIR with rel_path of dst_dir.rel_path + / + move_me.name + dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_dir.in_path.id).filter(Dir.rel_path==dst_dir.rel_path+'/'+move_me.name).first() + # new location has same named dir, so lets use it, rather than use move_me + if dir: + dst_dir=dir.dir_details + else: + # no need to create a new Dir object, we can use 'move_me' as it is # unique to the new destination + print( f"DDP: need to reset dst_dir's rel_path to new location!" ) + print( f"### DDP: dst_dir={dst_dir}" ) + print( f"### DDP: move_me={move_me}" ) + # reset this Dir's rel_path to its parents (dst_dir), plus its name + move_me.dir_details.rel_path = dst_dir.rel_path + '/' + move_me.name + move_me.in_dir = dst_dir + move_me.in_path = dst_dir.in_path + print( f"### DDP: move_me={move_me}" ) + session.add(move_me) + # move the actual dir to its new location + print( f"DDP: doing actual FS move for DIR - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) + os.replace( orig_fs_pos, move_me.FullPathOnFS() ) + return + + for sub_ent in sub_ents: + print( f"DDP: in Move, going through sub-ents, dst_dir={dst_dir}" ) + print( f"DDP: in Move, going through sub-ents, move_me={move_me}" ) + MoveEntryToOtherFolder( job, sub_ent, dst_dir ) + + # given the else above returns, then the move_me directory was a duplicate, and we have moved its contents, so its now empty in the old fstree so remove it + CleanUpDirInDB(job, move_me) + else: + # check for duplicate name? + e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==move_me.name,Dir.eid==dst_dir.eid).first() + if e: + print( f"Moving {move_me.name} and it is a duplicate of a another file (by name)" ) + # as we have original fs_pos, we can just rename new to be Move of XXX, and it will move it to that new name + move_me.name = 'Move of ' + move_me.name + + # its a unique file in this new structure, so just make sure it is in the right DIR + print( f"change orig_name={orig_name} -> {move_me} to {dst_dir}" ) + move_me.in_dir = dst_dir + move_me.in_path = dst_dir.in_path + session.add(move_me) + print( f"move_me is now {move_me}" ) + # move the actual file to its new location + print( f"DDP: doing actual FS move for FILE - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" ) + os.replace( orig_fs_pos, move_me.FullPathOnFS() ) + return + #################################################################################################################################### # Function that moves a file into a new folder in any path (usually form import to storage) - if needed it makes the folder on the FS, # moves the file into the folder on the FS and then changes the path to the appropriate one @@ -1015,19 +1156,33 @@ def MoveFileToRecycleBin(job,del_me): def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path): orig_parent_dir_e=session.query(Entry).get(move_me.in_dir.eid) if DEBUG: - print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_storage_path}") - try: - dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path - if DEBUG: - print( f"would make dir: {dst_dir}" ) - os.makedirs( dst_dir,mode=0o777, exist_ok=True ) - src=move_me.FullPathOnFS() - dst=dst_dir + '/' + move_me.name - os.replace( src, dst ) - if DEBUG: - print( f"would mv {src} {dst}" ) - except Exception as e: - print( f"ERROR: Failed to move file to new location on filesystem, err: {e}") + print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_rel_path}") + + # NEED TO SEE IF dst_rel_path already exists in dst_storage_path + move_to_existing=None + dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first() + if dir: + print( f"DEBUG: Okay, moving to an existing folder: ") + move_to_existing=dir + # IF it does, then don't move the folder, move its contents on the FS -- # RECURSIVELY!!! (as a sub_dir could also exist in new dst/sub_dir) + # IF it does, then don't just fudge the parent dir, reset the children entries to the existing Dir + + if move_to_existing == None: + try: + dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path + if DEBUG: + print( f"would make dir: {dst_dir}" ) + os.makedirs( dst_dir,mode=0o777, exist_ok=True ) + src=move_me.FullPathOnFS() + dst=dst_dir + '/' + move_me.name + os.replace( src, dst ) + if DEBUG: + print( f"would mv {src} {dst}" ) + except Exception as e: + print( f"ERROR: Failed to move file to new location on filesystem, err: {e}") + else: + # might be able to do a cp -a, and then del old files? (would work in # theory) + print( "Not moving files on FS as yet" ) # need these for AddDir calls below to work parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first() @@ -1039,12 +1194,14 @@ def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path): for dirname in dst_rel_path.split("/"): part_rel_path += f"{dirname}" if DEBUG: - print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in storage path" ) + print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" ) new_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path ) + print("OKAY, we called AddDir, but hopefully it just used existing.") parent_dir=new_dir part_rel_path += "/" if DEBUG: print( f"now should change {move_me} in_dir to {new_dir} created above in {dst_storage_path}" ) + move_me.in_dir = new_dir move_me.in_path = dst_storage_path if move_me.type.name == "Directory": @@ -1052,16 +1209,37 @@ def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path): print( f"{move_me.name} is a dir, so reset its dir_details path to the new path too" ) move_me.dir_details.in_path = dst_storage_path move_me.dir_details.rel_path = move_me.in_dir.rel_path+'/'+move_me.name - ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path ) + if move_to_existing: + # okay, need to take all direct children move them to existing dir instead + MoveContentToNewDirInDB( move_me, move_to_existing ) + ResetAnySubdirPaths( move_to_existing, dst_storage_path, move_to_existing.dir_details.rel_path ) + else: + ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path ) if DEBUG: print( f"DONE change of {move_me} in_dir to {new_dir} created above" ) session.add(move_me) CleanUpDirInDB(job, orig_parent_dir_e) # reset last_hash_date otherwise, the move resets ctime on the FS, and so scanning sees a 'new' file - move_me.file_details.last_hash_date = time.time() + if move_me.type.name != "Directory": + move_me.file_details.last_hash_date = time.time() AddLogForJob(job, f"{move_me.name} - (moved to {os.path.dirname(move_me.FullPathOnFS())})" ) return +#################################################################################################################################### +# moves all entries in 'old_dir' to 'new_dir' - At the moment only used when we move +# a directory from one part of the fs tree over to an existing part of the fs tree +#################################################################################################################################### +def MoveContentToNewDirInDB( old_dir, new_dir ): + sub_ents = session.query(Entry).join(FileType).join(EntryDirLink).filter(EntryDirLink.dir_eid==old_dir.id).all() + for sub_ent in sub_ents: + sub_ent.in_dir = new_dir + session.add(sub_ent) + return + +#################################################################################################################################### +# take a dir that is being moved, and reset its own and any sub dirs rel_paths, +# to the new PATH and relevant rel_path +#################################################################################################################################### def ResetAnySubdirPaths( moving_dir, dst_storage_path, parent_rel_path ): if DEBUG: print( f"ResetAnySubdirPaths( {moving_dir.name}, {dst_storage_path.path_prefix}, {parent_rel_path} )" ) @@ -1618,9 +1796,9 @@ def JobMoveFiles(job): for jex in job.extra: if 'eid-' in jex.name: -# move_me=session.query(Entry).join(File).filter(Entry.id==jex.value).first() move_me=session.query(Entry).get(jex.value) - MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" ) + MoveEntriesToOtherFolder( job, move_me, dst_storage_path, f"{prefix}{suffix}" ) +# MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" ) now=datetime.now(pytz.utc) next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 ) session.add(next_job)