Alternative way to do atomic writing

This is a hopefully more robust way of doing atomic writing of a file. Previously, we wrote a new temporary file on each write, and renamed it over the target file on success. This is technically neat, but constantly recreating the file causes problems with some network filesystems, and with sync tools like Dropbox. The new approach copies the old file contents to a temporary file, overwrites the target using standard open() and write() calls, and then removes the temporary file. In case of a failure during writing, the temporary file is renamed over the target, which should preserve the old data. This way, we're only using a new inode in case of a write failure, which is hopefully rare, instead of on successful writes.
11 years ago · bdc226a638
parent ecc5f52c66
commit bdc226a638
1 changed files with 19 additions and 29 deletions
--- a/jupyter_notebook/services/contents/fileio.py
+++ b/jupyter_notebook/services/contents/fileio.py
@ -11,7 +11,6 @@ import errno
 import io
 import os
 import shutil
-import tempfile

 from tornado.web import HTTPError

@ -39,14 +38,10 @@ def _copy_metadata(src, dst):
 def atomic_writing(path, text=True, encoding='utf-8', **kwargs):
    """Context manager to write to a file only if the entire write is successful.
    
-    This works by creating a temporary file in the same directory, and renaming
-    it over the old file if the context is exited without an error. If other
-    file names are hard linked to the target file, this relationship will not be
-    preserved.
-    
-    On Windows, there is a small chink in the atomicity: the target file is
-    deleted before renaming the temporary file over it. This appears to be
-    unavoidable.
+    This works by copying the previous file contents to a temporary file in the
+    same directory, and renaming that file back to the target if the context
+    exits with an error. If the context is successful, the new data is synced to
+    disk and the temporary file is removed.
    
    Parameters
    ----------
@ -70,40 +65,35 @@ def atomic_writing(path, text=True, encoding='utf-8', **kwargs):
        path = os.path.join(os.path.dirname(path), os.readlink(path))

    dirname, basename = os.path.split(path)
-    tmp_dir = tempfile.mkdtemp(prefix=basename, dir=dirname)
-    tmp_path = os.path.join(tmp_dir, basename)
+    # The .~ prefix will make Dropbox ignore the temporary file.
+    tmp_path = os.path.join(dirname, '.~'+basename)
+    if os.path.isfile(path):
+        shutil.copy2(path, tmp_path)
+
    if text:
-        fileobj = io.open(tmp_path, 'w', encoding=encoding, **kwargs)
+        fileobj = io.open(path, 'w', encoding=encoding, **kwargs)
    else:
-        fileobj = io.open(tmp_path, 'wb', **kwargs)
+        fileobj = io.open(path, 'wb', **kwargs)

    try:
        yield fileobj
    except:
+        # Failed! Move the backup file back to the real path to avoid corruption
        fileobj.close()
-        shutil.rmtree(tmp_dir)
+        if os.name == 'nt' and os.path.exists(path):
+            # Rename over existing file doesn't work on Windows
+            os.remove(path)
+        os.rename(tmp_path, path)
        raise

    # Flush to disk
    fileobj.flush()
    os.fsync(fileobj.fileno())
-
-    # Written successfully, now rename it
    fileobj.close()

-    # Copy permission bits, access time, etc.
-    try:
-        _copy_metadata(path, tmp_path)
-    except OSError:
-        # e.g. the file didn't already exist. Ignore any failure to copy metadata
-        pass
-
-    if os.name == 'nt' and os.path.exists(path):
-        # Rename over existing file doesn't work on Windows
-        os.remove(path)
-
-    os.rename(tmp_path, path)
-    shutil.rmtree(tmp_dir)
+    # Written successfully, now remove the backup copy
+    if os.path.isfile(tmp_path):
+        os.remove(tmp_path)


 class FileManagerMixin(object):