Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(artifacts): correctly handle artifacts with 0-byte files with wandb-core #7321

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions core/internal/filetransfer/file_transfer_default.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package filetransfer

import (
"bytes"
"fmt"
"io"
"math"
Expand Down Expand Up @@ -94,6 +95,14 @@ func (ft *DefaultFileTransfer) Upload(task *Task) error {
if err != nil {
return err
}
if task.Size == 0 {
// *Sometimes* on *some* platforms, req.Body is set to a non-nil io.Reader that
// would return 0 bytes if read, but net/http/Request.outgoingLength takes the
// presence of a body to indicate that length > 0 and "corrects" it to -1 to
// indicate unknown. If we don't fix it here we get 411 Length Required errors.
req.Body = io.NopCloser(bytes.NewReader([]byte{}))
task.Headers = append(task.Headers, "Content-Length:0")
}
Comment on lines +98 to +105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think you need to do it here, we are uploading empty files regularly when we use wandb.save pretty sure it is something artifact specific

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can repro outside wandb just using the net/http library... How do you do it in wandb.save()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the above fix doesn't even work. It sometimes does which is even weirder, but the net/http library is where the content-length gets rewritten and I haven't figured out how to force it to send a correct content length.

for _, header := range task.Headers {
parts := strings.SplitN(header, ":", 2)
if len(parts) != 2 {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1535,6 +1535,18 @@ def test_cache_cleanup_allows_upload(wandb_init, tmp_path, monkeypatch):
assert cache.cleanup(0) == 2**20


def test_artifact_with_empty_file_ok(wandb_init, tmp_path):
artifact = wandb.Artifact(type="dataset", name="my-arty")
path = tmp_path / "empty.txt"
path.touch()
artifact.add_file(path)
with wandb_init() as run:
run.log_artifact(artifact)
artifact.wait()

assert artifact.digest == "555f3aae5f5deeec705ad5e62f515bd5"


def test_artifact_ttl_setter_getter():
art = wandb.Artifact("test", type="test")
with pytest.raises(ArtifactNotLoggedError):
Expand Down
3 changes: 0 additions & 3 deletions tests/pytest_tests/system_tests/test_launch/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,6 @@ def test_create_job_artifact(runner, user, wandb_init, test_settings):
assert str(job._input_types) == "{'input1': Number}"


@pytest.mark.skip(
reason="This test is failing because it uploads an empty to file in an artifact"
)
def test_create_git_job(runner, user, wandb_init, test_settings, monkeypatch):
proj = "test-p99999"
settings = test_settings({"project": proj})
Expand Down