working condensed / logical version of moving on Files system - works for multiple scenarios

This commit is contained in:
2022-01-07 17:32:23 +11:00
parent e5f271c4bf
commit 665f45c03e

View File

@@ -1014,227 +1014,102 @@ def MoveFileToRecycleBin(job,del_me):
####################################################################################################################################
# move_me: (single) ENTRY being moved to a different folder. It might be a FILE or a DIR
# dst_storage_path: the path move_me is going into (import/storage, etc.) -- *might* be new or the same
# dst_storage_path: the path move_me is going into (import/storage, etc.)
# dst_rel_path: the relative path in the (new?) path, that is the new location
# (might contain any combo of existing/new dirs in the location)
#
# possible scenarios:
# 1: move_me is a File and it is being moved to a new folder (and maybe a new PATH)
# 2: move_me is a File and it is being moved a 'new' folder (that already exists) and its is a duplicate by name
# 2: move_me is a File and it is being moved to an existing folder
# need to just make sure that there is not a duplicate name for this file in existing folder
# 3: move_me is a Directory and it is being moved to a new folder (and maybe a new PATH)
# need to 'create' new Dir in new location (really just a logical move of the DB entry)
# This then requires all sub_dirs of move_me to have their path/rel_path's reset
# 4: move_me is a Directory and it is being moved a 'new' folder (that already exists) and some entry/entries have the same name(s)
# 4: move_me is a Directory and it is being moved an existing folder
# need to move old Dir INTO existing folder
# This then requires all sub_dirs of move_me to have their path/rel_path's reset
# BUT, if move_me has the same name as dst_storage_path/dst_rel_path:
# This then requires all sub_dirs of move_me to have their contents moved to the existing DIR in the DB for the destination
# AND this could also entail a sub1/sub2 or sub1/sub2/file1.jpg need to handle clashes
#
# So go entry by entry, if we are a Dir, recurse into it (and process it too), if we are an File process it.
####################################################################################################################################
def MoveEntriesToOtherFolder(job, move_me, dst_storage_path, dst_rel_path):
print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})")
# keep the dir of this entry to check if it is empty later on (for a file use in_dir, for a dir, choose itself, as in scen 3 it needs to be deleted)
if move_me.type.name == "Directory":
orig_dir_id = move_me.id
else:
orig_dir_id = move_me.in_dir.eid
# see if there is an existing dir of new dst_rel_path already
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first()
if dir:
dst_dir=dir.dir_details
else:
# okay, go through new relative path and AddDir any missing subdirs of this
# path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will
# create Storage/Dir1, Storage/Dir1/Dir2
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first()
part_rel_path=""
for dirname in dst_rel_path.split("/"):
part_rel_path += f"{dirname}"
if DEBUG:
print( f"Should make/find a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" )
dst_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
try:
new_dirname=dst_storage_path.path_prefix + '/' + part_rel_path
print( f"CREATE fs dir: {new_dirname}" )
os.makedirs( new_dirname,mode=0o777, exist_ok=True )
except Exception as e:
print( f"ERROR: Failed to make new dir(s) at new location on filesystem, err: {e}")
parent_dir=dst_dir
part_rel_path += "/"
session.commit()
###
# by here, dst_dir is either the existing relavant DIR at new location, or a new DIR at new location
###
# get entries that are in move_me DIR and deal with them one-by-one
if move_me.type.name == "Directory":
sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==move_me.id).all()
for sub_ent in sub_ents:
MoveEntryToOtherFolder( job, sub_ent, dst_dir )
# just a file, so move me to new dst_dir
else:
MoveEntryToOtherFolder( job, move_me, dst_dir )
# if this is now empty, this will deal with it, if any content left, this will do nothing
old_dir = session.query(Entry).filter(Entry.id==orig_dir_id).first()
CleanUpDirInDB(job, old_dir)
return
####################################################################################################################################
# This func is recursively called to move the single ENTRY: move_me entry to the new DIR: dst_dir
# dst_path is passed in as well as we can recurse into the original Dir structures that are in the (potentially) old path
# and instead of making a new Dir, etc. in the DB, for optimising, we just reuse the (old) Dir by resetting its rel_path and Path
#
# This is where the ENTRY has its in_dir and in_path reset appropriately.
####################################################################################################################################
def MoveEntryToOtherFolder(job, move_me, dst_dir ):
print( f"DEBUG: MoveEntryToOtherFolder( job={job.id}, move_me={move_me.name}, dst_dir={dst_dir.eid} )")
if DEBUG:
print( f"DEBUG: MoveEntriesToOtherFolder( job={job.id}, move_me={move_me.name}, dst_storage_path={dst_storage_path.id}, dst_rel_path={dst_rel_path})")
orig_name=move_me.name
orig_fs_pos=move_me.FullPathOnFS()
if move_me.type.name == "Directory":
# get sub_ents now before we (potentially) mess with dst_dir
sub_ents = session.query(Entry).join(EntryDirLink).filter(EntryDirLink.dir_eid==dst_dir.eid).all()
# need to check to see if existing dir in new location, (eg if TEST/T1/2018 -> TEST/2018 and we now are moving TEST/T1/2018/A ->TEST/2018/ - then A might exist in both)
# at this point move_me is TEST/T1/2018/A, and dst_dir is TEST/2018, so look for a DIR with rel_path of dst_dir.rel_path + / + move_me.name
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_dir.in_path.id).filter(Dir.rel_path==dst_dir.rel_path+'/'+move_me.name).first()
# new location has same named dir, so lets use it, rather than use move_me
if dir:
dst_dir=dir.dir_details
else:
# no need to create a new Dir object, we can use 'move_me' as it is # unique to the new destination
print( f"DDP: need to reset dst_dir's rel_path to new location!" )
print( f"### DDP: dst_dir={dst_dir}" )
print( f"### DDP: move_me={move_me}" )
# reset this Dir's rel_path to its parents (dst_dir), plus its name
# see if there is an existing dir of new dst_rel_path already
parent_dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first()
if parent_dir:
# scen 4 move move_me into existing dst_dir of requested location
dst_dir=parent_dir.dir_details
# remember, we are moving (move_me - a dir) into this existing dir (dst_dir), so add the name to dst_dir's rel_path...
move_me.dir_details.rel_path = dst_dir.rel_path + '/' + move_me.name
move_me.in_dir = dst_dir
move_me.in_path = dst_dir.in_path
print( f"### DDP: move_me={move_me}" )
session.add(move_me)
# move the actual dir to its new location
print( f"DDP: doing actual FS move for DIR - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
AddLogForJob( job, f"INFO: move {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
os.replace( orig_fs_pos, move_me.FullPathOnFS() )
# we use the new path to this new Dir with the full location (the old dir is put into the new location)
ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path )
else:
# scen 3 -- effectively renaming a dir
# we have a new path (dst_rel_path)... The last component of that is what we will rename move_me to.
# however, it couild be a path of existing/new or a combo up to the rename part, so use AddDir to find/create
# as needed to then "move" (rename) move_me to into the full dst_rel_path
part_rel_path=""
for dirname in os.path.dirname(dst_rel_path).split("/"):
part_rel_path += f"{dirname}"
parent_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
part_rel_path += "/"
move_me.in_dir = parent_dir
move_me.dir_details.rel_path = dst_rel_path
move_me.name = os.path.basename(dst_rel_path)
ResetAnySubdirPaths( move_me, dst_storage_path, dst_rel_path )
AddLogForJob( job, f"INFO: move {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
os.replace( orig_fs_pos, move_me.FullPathOnFS() )
return
for sub_ent in sub_ents:
print( f"DDP: in Move, going through sub-ents, dst_dir={dst_dir}" )
print( f"DDP: in Move, going through sub-ents, move_me={move_me}" )
MoveEntryToOtherFolder( job, sub_ent, dst_dir )
# given the else above returns, then the move_me directory was a duplicate, and we have moved its contents, so its now empty in the old fstree so remove it
CleanUpDirInDB(job, move_me)
else:
# check for duplicate name?
# for a file, just get the top of the Path, and then we will use it to
# potentially make all the sub dirs for the new location
parent_dir=session.query(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path=='').first()
#
# TODO: this create Dirs in Path is everywhere, make it a function BUT
# also, the code above for the Dirs is very similar, could wrap them
# both with slightly different params and it would work...
# MAKE THIS A FUNCTION
#
part_rel_path=""
for dirname in dst_rel_path.split("/"):
part_rel_path += f"{dirname}"
parent_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
part_rel_path += "/"
dst_dir=parent_dir
# just make sure the Dir tree exists on the FS
os.makedirs( dst_storage_path.path_prefix + '/' + dst_rel_path, mode=0o777, exist_ok=True )
# check for duplicate name? (scen 2)
e=session.query(Entry).join(EntryDirLink).join(Dir).filter(Entry.name==move_me.name,Dir.eid==dst_dir.eid).first()
if e:
print( f"Moving {move_me.name} and it is a duplicate of a another file (by name)" )
AddLogForJob( job, f"INFO: Moving {move_me.name} and it is a duplicate of a another file (by name), prepending 'Move of'" )
# as we have original fs_pos, we can just rename new to be Move of XXX, and it will move it to that new name
move_me.name = 'Move of ' + move_me.name
# its a unique file in this new structure, so just make sure it is in the right DIR
print( f"change orig_name={orig_name} -> {move_me} to {dst_dir}" )
orig_dir_eid = move_me.in_dir.eid
move_me.in_dir = dst_dir
move_me.in_path = dst_dir.in_path
session.add(move_me)
print( f"move_me is now {move_me}" )
# move the actual file to its new location
print( f"DDP: doing actual FS move for FILE - mv {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
AddLogForJob( job, f"DEBUG: move of FILE - {orig_fs_pos} -> {move_me.FullPathOnFS()}" )
os.replace( orig_fs_pos, move_me.FullPathOnFS() )
old_dir = session.query(Entry).filter(Entry.id==orig_dir_eid).first()
CleanUpDirInDB(job, old_dir)
return
####################################################################################################################################
# Function that moves a file into a new folder in any path (usually form import to storage) - if needed it makes the folder on the FS,
# moves the file into the folder on the FS and then changes the path to the appropriate one
####################################################################################################################################
def MoveFileToNewFolderInStorage(job,move_me, dst_storage_path, dst_rel_path):
orig_parent_dir_e=session.query(Entry).get(move_me.in_dir.eid)
if DEBUG:
print( f"MoveFileToNewFolderInStorage: {move_me} to {dst_storage_path} in new? folder: {dst_rel_path}")
# NEED TO SEE IF dst_rel_path already exists in dst_storage_path
move_to_existing=None
dir=session.query(Entry).join(Dir).join(PathDirLink).join(Path).filter(Path.id==dst_storage_path.id).filter(Dir.rel_path==dst_rel_path).first()
if dir:
print( f"DEBUG: Okay, moving to an existing folder: ")
move_to_existing=dir
# IF it does, then don't move the folder, move its contents on the FS -- # RECURSIVELY!!! (as a sub_dir could also exist in new dst/sub_dir)
# IF it does, then don't just fudge the parent dir, reset the children entries to the existing Dir
if move_to_existing == None:
try:
dst_dir=dst_storage_path.path_prefix + '/' + dst_rel_path
if DEBUG:
print( f"would make dir: {dst_dir}" )
os.makedirs( dst_dir,mode=0o777, exist_ok=True )
src=move_me.FullPathOnFS()
dst=dst_dir + '/' + move_me.name
os.replace( src, dst )
if DEBUG:
print( f"would mv {src} {dst}" )
except Exception as e:
print( f"ERROR: Failed to move file to new location on filesystem, err: {e}")
else:
# might be able to do a cp -a, and then del old files? (would work in # theory)
print( "Not moving files on FS as yet" )
# need these for AddDir calls below to work
parent_dir=session.query(Dir).join(PathDirLink).filter(PathDirLink.path_id==dst_storage_path.id).first()
# okay, go through new relative path and AddDir any missing subdirs of this
# path (think Import/Dir1/Dir2) which b/c we have dst_storage_path in AddDir will
# create Storage/Dir1, Storage/Dir1/Dir2
part_rel_path=""
for dirname in dst_rel_path.split("/"):
part_rel_path += f"{dirname}"
if DEBUG:
print( f"Should make a Dir in the DB for {dirname} with parent: {parent_dir}, prp={part_rel_path} in {dst_storage_path} path" )
new_dir=AddDir( job, dirname, parent_dir, part_rel_path, dst_storage_path )
print("OKAY, we called AddDir, but hopefully it just used existing.")
parent_dir=new_dir
part_rel_path += "/"
if DEBUG:
print( f"now should change {move_me} in_dir to {new_dir} created above in {dst_storage_path}" )
move_me.in_dir = new_dir
move_me.in_path = dst_storage_path
if move_me.type.name == "Directory":
if DEBUG:
print( f"{move_me.name} is a dir, so reset its dir_details path to the new path too" )
move_me.dir_details.in_path = dst_storage_path
move_me.dir_details.rel_path = move_me.in_dir.rel_path+'/'+move_me.name
if move_to_existing:
# okay, need to take all direct children move them to existing dir instead
MoveContentToNewDirInDB( move_me, move_to_existing )
ResetAnySubdirPaths( move_to_existing, dst_storage_path, move_to_existing.dir_details.rel_path )
else:
ResetAnySubdirPaths( move_me, dst_storage_path, move_me.dir_details.rel_path )
if DEBUG:
print( f"DONE change of {move_me} in_dir to {new_dir} created above" )
session.add(move_me)
CleanUpDirInDB(job, orig_parent_dir_e)
# reset last_hash_date otherwise, the move resets ctime on the FS, and so scanning sees a 'new' file
if move_me.type.name != "Directory":
move_me.file_details.last_hash_date = time.time()
AddLogForJob(job, f"{move_me.name} - (moved to {os.path.dirname(move_me.FullPathOnFS())})" )
return
####################################################################################################################################
# moves all entries in 'old_dir' to 'new_dir' - At the moment only used when we move
# a directory from one part of the fs tree over to an existing part of the fs tree
####################################################################################################################################
def MoveContentToNewDirInDB( old_dir, new_dir ):
sub_ents = session.query(Entry).join(FileType).join(EntryDirLink).filter(EntryDirLink.dir_eid==old_dir.id).all()
for sub_ent in sub_ents:
sub_ent.in_dir = new_dir
session.add(sub_ent)
return
####################################################################################################################################
# take a dir that is being moved, and reset its own and any sub dirs rel_paths,
@@ -1798,7 +1673,6 @@ def JobMoveFiles(job):
if 'eid-' in jex.name:
move_me=session.query(Entry).get(jex.value)
MoveEntriesToOtherFolder( job, move_me, dst_storage_path, f"{prefix}{suffix}" )
# MoveFileToNewFolderInStorage(job, move_me, dst_storage_path, f"{prefix}{suffix}" )
now=datetime.now(pytz.utc)
next_job=Job(start_time=now, last_update=now, name="checkdups", state="New", wait_for=None, pa_job_state="New", current_file_num=0 )
session.add(next_job)