-
Notifications
You must be signed in to change notification settings - Fork 334
Description
Before Asking 在提问之前
-
I have pulled the latest code of main branch to run again and the problem still existed. 我已经拉取了主分支上最新的代码,重新运行之后,问题仍不能解决。
Search before asking 先搜索,再提问
Question
我使用的是wsl2下的docker pull v1..4.4 使用的本地数据集
在进行analyzer时运行结束时出现这个问题,
────────────────────────── Traceback (most recent call last) ───────────────────────────
/usr/local/lib/python3.10/dist-packages/streamlit/runtime/scriptrunner/exec_code.py:
129 in exec_func_with_error_handling
/usr/local/lib/python3.10/dist-packages/streamlit/runtime/scriptrunner/script_runner
.py:669 in code_to_exec
/data-juicer/app.py:697 in
694
695
696 if __name__ == "__main__":
❱ 697 │ main()
698
/data-juicer/app.py:693 in main
690
691
692 def main():
❱ 693 │ Visualize.visualize()
694
695
696 if name == "main":
/data-juicer/app.py:687 in visualize
684 │ │ Visualize.setup()
685 │ │ Visualize.parser()
686 │ │ Visualize.analyze_process()
❱ 687 │ │ Visualize.filter()
688 │ │ Visualize.diversity()
689 │ │ Visualize.auxiliary()
690
/data-juicer/app.py:655 in filter
652 │ │ with st.expander("Effect of Filter OPs", expanded=False):
653 │ │ │ dataset = st.session_state.get("dataset", None)
654 │ │ │ if dataset:
❱ 655 │ │ │ │ Visualize.filter_dataset(dataset)
656 │ │ │ else:
657 │ │ │ │ st.warning("Please analyze original data first")
658
/data-juicer/app.py:339 in filter_dataset
336 │ │ else:
337 │ │ │ all_conds = np.all([list(cond.values())[0] for cond in conds], axi
338 │ │ ds = pd.DataFrame(dataset)
❱ 339 │ │ Visualize.display_dataset(ds, all_conds, show_num, "Retained samples",
340 │ │ st.download_button(
341 │ │ │ "Download Retained data as JSONL", data=convert_to_jsonl(ds.loc[al
342 │ │ )
/data-juicer/app.py:512 in display_dataset
509 │
510 │ @staticmethod
511 │ def display_dataset(dataframe, cond, show_num, desp, type, all=True):
❱ 512 │ │ examples = dataframe.loc[cond]
513 │ │ if all or len(examples) > 0:
514 │ │ │ st.subheader(
515 │ │ │ │ f"{desp}: :red[{len(examples)}] of "
/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py:1191 in getitem
1188 │ │ │
1189 │ │ │ maybe_callable = com.apply_if_callable(key, self.obj)
1190 │ │ │ maybe_callable = self._check_deprecated_callable_usage(key, maybe
❱ 1191 │ │ │ return self._getitem_axis(maybe_callable, axis=axis)
1192 │
1193 │ def _is_scalar_access(self, key: tuple):
1194 │ │ raise NotImplementedError()
/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py:1431 in
_getitem_axis
1428 │ │
1429 │ │ # fall thru to straight lookup
1430 │ │ self._validate_key(key, axis)
❱ 1431 │ │ return self._get_label(key, axis=axis)
1432 │
1433 │ def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1434 │ │ """
/usr/local/lib/python3.10/dist-packages/pandas/core/indexing.py:1381 in _get_label
1378 │
1379 │ def _get_label(self, label, axis: AxisInt):
1380 │ │ # GH#5567 this will fail if the label is not present in the axis.
❱ 1381 │ │ return self.obj.xs(label, axis=axis)
1382 │
1383 │ def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
1384 │ │ # we have an axis0 multi-index, handle or raise
/usr/local/lib/python3.10/dist-packages/pandas/core/generic.py:4301 in xs
4298 │ │ │ │ else:
4299 │ │ │ │ │ new_index = index[loc]
4300 │ │ else:
❱ 4301 │ │ │ loc = index.get_loc(key)
4302 │ │ │
4303 │ │ │ if isinstance(loc, np.ndarray):
4304 │ │ │ │ if loc.dtype == np.bool_:
/usr/local/lib/python3.10/dist-packages/pandas/core/indexes/range.py:417 in get_loc
414 │ │ │ except ValueError as err:
415 │ │ │ │ raise KeyError(key) from err
416 │ │ if isinstance(key, Hashable):
❱ 417 │ │ │ raise KeyError(key)
418 │ │ self._check_indexing_error(key)
419 │ │ raise KeyError(key)
420
────────────────────────────────────────────────────────────────────────────────────────
KeyError: True
Additional 额外信息
还有一点我的挂载信息为-v D:/data-juicer-main/data-juicer-main:/data-juicer
data的配置为{"images": ["tests/ops/data/img1.png"], "text": "<__dj__image> A comfortable bed."}
{"images": ["tests/ops/data/img2.jpg"], "text": "<__dj__image> A bus."}
{"images": ["tests/ops/data/img3.jpg"], "text": "<__dj__image> Black and white photograph of a woman holding an umbrella."}
{"images": ["tests/ops/data/img4.png"], "text": "<__dj__image> A comfortable bed."}
数据经算子清晰后{"images":["tests/ops/data/img2.jpg"],"text":"<__dj__image> A bus."}
process中会出现 image_shape_filter │ <class 'FileNotFoundError'> │ [Errno 2] No such file or directory: '/data-juicer/outputs/demo-analyzer-image_1/tests/ops/data/img2.jpg
这个我应该如何解决使用绝对路径吗h,但是挂载到docker还是会拼接。或者将这些信息都放在同一个文件夹上包括数据