Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/gzip #35

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 69 additions & 112 deletions pywebdav/lib/WebDAVServer.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,59 @@ class DAVRequestHandler(AuthServer.AuthRequestHandler, LockManager):
server_version = "DAV/" + __version__
encode_threshold = 1400 # common MTU

def data_to_bytes_iterator(self, data):
if isinstance(data, (six.string_types, six.binary_type)):
log.debug("Not using iterator, string type")
data = (data,)
elif self._config.DAV.getboolean('http_response_use_iterator'):
# Use iterator to reduce using memory
log.debug("Use iterator")
else:
# Don't use iterator, it's a compatibility option
data = (data.read(),)
log.debug("Don't use iterator")

for buf in data:
yield buf if isinstance(buf, six.binary_type) else str(buf).encode('utf-8')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be safer as:

Suggested change
yield buf if isinstance(buf, six.binary_type) else str(buf).encode('utf-8')
yield buf if isinstance(buf, six.binary_type) else bytes(buf, 'utf-8')

Using str(buf) could cause an undesired output if buf happens to be something that doesn't implicitely convert to the str cleanly.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should just throw exception in such case? This should be str or bytes anyway.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I don't think it's an expected error, throwing an exception is ugly useful if you're going to catch and deal with it somewhere.

Without tracing the code through I could see it potentially being a bytearray, or a future change might want to change it to one...

This was more of a best practices suggestion, if you want to convert the object to a bytes it's better to convert in one step, rather than convert to str first and then to bytes.


def send_body_encoded(self, headers, data, content_type):
self.send_header('Date', rfc1123_date())
self._send_dav_version()

for a, v in headers.items():
self.send_header(a, v)

if not data:
self.send_header('Content-Length', "0")
self.end_headers()
return True

try:
is_gzip = ('gzip' in self.headers.get('Accept-Encoding', '').split(',')
and len(data) > self.encode_threshold)
if is_gzip:
buffer = io.BytesIO()
output = gzip.GzipFile(mode='wb', fileobj=buffer)
for buf in self.data_to_bytes_iterator(data):
output.write(buf)
output.close()
buffer.seek(0)
data = buffer.getvalue()
except Exception as ex:
is_gzip = False
log.exception(ex)

self.send_header('Content-Length', str(len(data)))
self.send_header('Content-Type', content_type)

if is_gzip:
self.send_header('Content-Encoding', 'gzip')
self.end_headers()
if data:
self.wfile.write(data)

return is_gzip

def send_body(self, DATA, code=None, msg=None, desc=None,
ctype='application/octet-stream', headers={}):
""" send a body in one part """
Expand All @@ -64,59 +117,12 @@ def send_body(self, DATA, code=None, msg=None, desc=None,
self.send_response(code, message=msg)
self.send_header("Connection", "close")
self.send_header("Accept-Ranges", "bytes")
self.send_header('Date', rfc1123_date())

self._send_dav_version()

for a, v in headers.items():
self.send_header(a, v)

if DATA:
try:
if 'gzip' in self.headers.get('Accept-Encoding', '').split(',') \
and len(DATA) > self.encode_threshold:
buffer = io.BytesIO()
output = gzip.GzipFile(mode='wb', fileobj=buffer)
if isinstance(DATA, str) or isinstance(DATA, six.text_type):
output.write(DATA)
else:
for buf in DATA:
output.write(buf)
output.close()
buffer.seek(0)
DATA = buffer.getvalue()
self.send_header('Content-Encoding', 'gzip')

self.send_header('Content-Length', len(DATA))
self.send_header('Content-Type', ctype)
except Exception as ex:
log.exception(ex)
else:
self.send_header('Content-Length', 0)

self.end_headers()
if DATA:
if isinstance(DATA, str):
DATA = DATA.encode('utf-8')
if isinstance(DATA, six.text_type) or isinstance(DATA, bytes):
log.debug("Don't use iterator")
self.wfile.write(DATA)
else:
if self._config.DAV.getboolean('http_response_use_iterator'):
# Use iterator to reduce using memory
log.debug("Use iterator")
for buf in DATA:
self.wfile.write(buf)
self.wfile.flush()
else:
# Don't use iterator, it's a compatibility option
log.debug("Don't use iterator")
res = DATA.read()
if isinstance(res,bytes):
self.wfile.write(res)
else:
self.wfile.write(res.encode('utf8'))
return None
if not self.send_body_encoded(headers, DATA, ctype):
self.end_headers()
for buf in self.data_to_bytes_iterator(DATA):
self.wfile.write(buf)
self.wfile.flush()

def send_body_chunks_if_http11(self, DATA, code, msg=None, desc=None,
ctype='text/xml; encoding="utf-8"',
Expand All @@ -133,69 +139,18 @@ def send_body_chunks(self, DATA, code, msg=None, desc=None,

self.responses[207] = (msg, desc)
self.send_response(code, message=msg)
self.send_header("Content-type", ctype)
self.send_header("Transfer-Encoding", "chunked")
self.send_header('Date', rfc1123_date())

self._send_dav_version()

for a, v in headers.items():
self.send_header(a, v)
if not self.send_body_encoded(headers, DATA, ctype):
self.send_header("Transfer-Encoding", "chunked")
self.end_headers()

GZDATA = None
if DATA:
if ('gzip' in self.headers.get('Accept-Encoding', '').split(',')
and len(DATA) > self.encode_threshold):
buffer = io.BytesIO()
output = gzip.GzipFile(mode='wb', fileobj=buffer)
if isinstance(DATA, bytes):
output.write(DATA)
else:
for buf in DATA:
buf = buf.encode() if isinstance(buf, six.text_type) else buf
output.write(buf)
output.close()
buffer.seek(0)
GZDATA = buffer.getvalue()
self.send_header('Content-Encoding', 'gzip')

self.send_header('Content-Length', len(DATA))
self.send_header('Content-Type', ctype)

else:
self.send_header('Content-Length', 0)

self.end_headers()

if GZDATA:
self.wfile.write(GZDATA)

elif DATA:
DATA = DATA.encode() if isinstance(DATA, six.text_type) else DATA
if isinstance(DATA, six.binary_type):
self.wfile.write(b"%s\r\n" % hex(len(DATA))[2:].encode())
self.wfile.write(DATA)
self.wfile.write(b"\r\n")
self.wfile.write(b"0\r\n")
for buf in self.data_to_bytes_iterator(DATA):
self.wfile.write(b"%x\r\n" % len(buf))
self.wfile.write(buf)
self.wfile.write(b"\r\n")
else:
if self._config.DAV.getboolean('http_response_use_iterator'):
# Use iterator to reduce using memory
for buf in DATA:
buf = buf.encode() if isinstance(buf, six.text_type) else buf
self.wfile.write((hex(len(buf))[2:] + "\r\n").encode())
self.wfile.write(buf)
self.wfile.write(b"\r\n")

self.wfile.write(b"0\r\n")
self.wfile.write(b"\r\n")
else:
# Don't use iterator, it's a compatibility option
self.wfile.write((hex(len(DATA))[2:] + "\r\n").encode())
self.wfile.write(DATA.read())
self.wfile.write(b"\r\n")
self.wfile.write(b"0\r\n")
self.wfile.write(b"\r\n")

self.wfile.write(b"0\r\n")
self.wfile.write(b"\r\n")

def _send_dav_version(self):
if self._config.DAV.getboolean('lockemulation'):
Expand Down Expand Up @@ -250,6 +205,8 @@ def _HEAD_GET(self, with_body=False):
content_type = 'text/html;charset=utf-8'
else:
content_type = dc.get_prop(uri, "DAV:", "getcontenttype")
if content_type == 'httpd/unix-directory':
content_type = 'text/html;charset=utf-8'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What inspired this change?
I'm not sure what the webdav spec suggests, however a google of webdav mime "httpd/unix-directory" gets a lot of hits; it seems to be pretty common?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a WebDAV response, but a standard HTTP GET/HEAD response returning the list of files as a HTML. Serving the HTML file as httpd/unix-directory makes no sense. It breaks the webbrowser pointed at that URL, while WebDAV clients would nevertheless use the WebDAV method PROPFIND and not the GET/HEAD.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, that makes sense. I didn't actually realise the server would serve "regular" http requests as well as webdav ones.

except DAV_NotFound:
content_type = "application/octet-stream"

Expand Down
24 changes: 16 additions & 8 deletions pywebdav/server/fshandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

log = logging.getLogger(__name__)

BUFFER_SIZE = 128 * 1000
BUFFER_SIZE = 128 * 1000
# include magic support to correctly determine mimetypes
MAGIC_AVAILABLE = False
try:
Expand Down Expand Up @@ -53,10 +53,10 @@ def read(self, length = 0):

data = self.__fp.read(length)
return data


class FilesystemHandler(dav_interface):
"""
"""
Model a filesystem for DAV

This class models a regular filesystem for the DAV server
Expand All @@ -68,6 +68,7 @@ class FilesystemHandler(dav_interface):
to /tmp/gfx/pix

"""
index_files = ()

def __init__(self, directory, uri, verbose=False):
self.setDirectory(directory)
Expand Down Expand Up @@ -155,6 +156,16 @@ def get_data(self,uri, range = None):

path=self.uri2local(uri)
if os.path.exists(path):
if os.path.isdir(path):
for filename in self.index_files:
new_path = os.path.join(path, filename)
if os.path.isfile(new_path):
path = new_path
break
else:
msg = self._get_listing(path)
return Resource(StringIO(msg), len(msg))
Comment on lines +159 to +167
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gather this index file handing is quite non-standard for webdav servers?

Does this mean that if it's enabled and you try to view a folder that's got an index style file in it, you get served that file rather than the directory listing?

by default is disabled

Looking at the code, is an end user expected to edit this python file to add filenames to index_files = () above?
Or have to create a wrapper script that imports this, injects the names, then start the server programatically?
For this to be included I think it'd need at least some docs, if not a command line argument to enable?

Copy link
Author

@jaboja jaboja Mar 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. WebDAV allows that behavior:
    http://www.webdav.org/specs/rfc4918.html#rfc.section.9.4

  2. I intended it to be extended via inheritance, like this:

class CustomFilesystemHandler(FilesystemHandler):
    mimecheck = True
    index_files = ("index.html",)

However I see it would be better if I write some docs for that feature.


if os.path.isfile(path):
file_size = os.path.getsize(path)
if range is None:
Expand Down Expand Up @@ -182,9 +193,6 @@ def get_data(self,uri, range = None):
fp.seek(range[0])
log.info('Serving range %s -> %s content of %s' % (range[0], range[1], uri))
return Resource(fp, range[1] - range[0])
elif os.path.isdir(path):
msg = self._get_listing(path)
return Resource(StringIO(msg), len(msg))
else:
# also raise an error for collections
# don't know what should happen then..
Expand Down Expand Up @@ -319,7 +327,7 @@ def rmcol(self,uri):
shutil.rmtree(path)
except OSError:
raise DAV_Forbidden # forbidden

return 204

def rm(self,uri):
Expand Down Expand Up @@ -353,7 +361,7 @@ def delone(self,uri):
return delone(self,uri)

def deltree(self,uri):
""" delete a collection
""" delete a collection

You have to return a result dict of the form
uri:error_code
Expand Down