Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed axis labels being cut off in interactive histograms #617

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 52 additions & 55 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5349,15 +5349,14 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co
>>> t.hist('value', group='category') # doctest: +SKIP
<two overlaid histograms of the data [1, 2, 3] and [2, 5]>
"""

# Matplotlib has deprecated the normed keyword.
# TODO consider changing this function to use density= instead too
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the justification for removing this TODO?

if 'normed' not in vargs and 'density' not in vargs:
vargs['density'] = True
elif 'normed' in vargs and 'density' not in vargs:
vargs['density'] = vargs.pop('normed')
elif 'normed' in vargs and 'density' in vargs:
raise ValueError("You can't specify both normed and density. "
"Use one or the other.")
raise ValueError("You can't specify both normed and density. Use one or the other.")

global _INTERACTIVE_PLOTS
if _INTERACTIVE_PLOTS:
Expand All @@ -5366,18 +5365,18 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co

return self.ihist(
*columns,
overlay = overlay,
bins = bins,
bin_column = bin_column,
unit = unit,
counts = counts,
group = group,
side_by_side = side_by_side,
left_end = left_end,
right_end = right_end,
width = width,
height = height,
rug = rug,
overlay=overlay,
bins=bins,
bin_column=bin_column,
unit=unit,
counts=counts,
group=group,
side_by_side=side_by_side,
left_end=left_end,
right_end=right_end,
width=width,
height=height,
rug=rug,
**vargs
)

Expand All @@ -5389,7 +5388,7 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co

if counts is not None and bin_column is None:
warnings.warn("counts arg of hist is deprecated; use bin_column")
bin_column=counts
bin_column = counts
if columns:
columns_included = list(columns)
if bin_column is not None:
Expand All @@ -5399,19 +5398,15 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co
self = self.select(*columns_included)
if group is not None:
if bin_column is not None:
raise ValueError("Using bin_column and group together is "
"currently unsupported.")
raise ValueError("Using bin_column and group together is currently unsupported.")
if len(columns) > 1:
raise ValueError("Using group with multiple histogram value "
"columns is currently unsupported.")
raise ValueError("Using group with multiple histogram value columns is currently unsupported.")

# Check for non-numerical values and raise a ValueError if any found
for col in self:
if col != group and any(isinstance(cell, np.flexible) for cell in self[col]):
raise ValueError("The column '{0}' contains non-numerical "
"values. A histogram cannot be drawn for this table."
.format(col))

raise ValueError("The column '{0}' contains non-numerical values. A histogram cannot be drawn for this table."
.format(col))

if bin_column is not None and bins is None:
bins = np.unique(self.column(bin_column))
Expand All @@ -5431,20 +5426,19 @@ def prepare_hist_with_group(group):
grouped = self.group(group, np.array)
if grouped.num_rows > 20:
warnings.warn("It looks like you're making a grouped histogram with "
"a lot of groups ({:d}), which is probably incorrect."
.format(grouped.num_rows))
"a lot of groups ({:d}), which is probably incorrect."
.format(grouped.num_rows))
return [("{}={}".format(group, k), (v[0][1],)) for k, v in grouped.index_by(group).items()]

# Populate values_dict: An ordered dict from column name to singleton
# tuple of array of values or a (values, weights) pair of arrays. If
# any values have weights, they all must have weights.
# Populate values_dict: An ordered dict from column name to data arrays
if bin_column is not None:
values_dict = prepare_hist_with_bin_column(bin_column)
elif group is not None:
values_dict = prepare_hist_with_group(group)
else:
values_dict = [(k, (self.column(k),)) for k in self.labels]
values_dict = collections.OrderedDict(values_dict)

if left_end is not None or right_end is not None:
if left_end is None:
if bins is not None and bins[0]:
Expand All @@ -5458,26 +5452,23 @@ def prepare_hist_with_group(group):
right_end = max([max(self.column(k)) for k in self.labels if np.issubdtype(self.column(k).dtype, np.number)])

def draw_hist(values_dict):
# Check if np.printoptions is set to legacy. Throw UserWarning if not
if np.get_printoptions()['legacy'] != '1.13':
warnings.warn("We've detected you're not using the '1.13' legacy setting for `np.printoptions`. "
"This may cause excessive error terms in your plots. We recommend solving this by running the "
"following code: `np.set_printoptions(legacy='1.13')`", UserWarning)
# This code is factored as a function for clarity only.
warnings.warn("We've detected you're not using the '1.13' legacy setting for `np.printoptions`. "
"This may cause excessive error terms in your plots. We recommend solving this by running the "
"following code: `np.set_printoptions(legacy='1.13')`", UserWarning)
n = len(values_dict)
colors = [rgb_color + (self.default_alpha,) for rgb_color in
itertools.islice(itertools.cycle(self.chart_colors), n)]
itertools.islice(itertools.cycle(self.chart_colors), n)]
hist_names = list(values_dict.keys())
values = [v[0] for v in values_dict.values()]
weights = [v[1] for v in values_dict.values() if len(v) > 1]
if n > len(weights) > 0:
raise ValueError("Weights were provided for some columns, but not "
" all, and that's not supported.")
raise ValueError("Weights were provided for some columns, but not all, and that's not supported.")
if rug and overlay and n > 1:
warnings.warn("Cannot plot overlaid rug plots; rug=True ignored", UserWarning)
if vargs['density']:
y_label = 'Percent per ' + (unit if unit else 'unit')
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100 * x))
else:
y_label = 'Count'

Expand All @@ -5490,32 +5481,31 @@ def draw_hist(values_dict):
vargs['weights'] = weights
if not side_by_side:
vargs.setdefault('histtype', 'stepfilled')
figure = plt.figure(figsize=(width, height))
plt.hist(values, color=colors, **vargs)
# if rug:
# plt.scatter(values, np.zeros_like(values), marker="|", color=colors)
axis = figure.get_axes()[0]
_vertical_x(axis)
axis.set_ylabel(y_label)
# Added to ensure proper default dimensions
plt.figure(figsize=(width, height))
plt.hist(values, color=colors, label=hist_names, **vargs)
plt.legend(loc=2, bbox_to_anchor=(1.05, 1))
_vertical_x(plt.gca())
plt.ylabel(y_label)
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
plt.gca().yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if group is not None and len(self.labels) == 2:
#There's a grouping in place but we're only plotting one column's values
# There's a grouping in place but we're only plotting one column's values
label_not_grouped = [l for l in self.labels if l != group][0]
axis.set_xlabel(label_not_grouped + x_unit, fontsize=16)
plt.xlabel(label_not_grouped + x_unit, fontsize=16)
else:
axis.set_xlabel(x_unit, fontsize=16)
plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
type(self).plots.append(axis)
plt.xlabel(x_unit, fontsize=16)
type(self).plots.append(plt.gca())
else:
_, axes = plt.subplots(n, 1, figsize=(width, height * n))
num_plots = n
fig, axes = plt.subplots(num_plots, 1, figsize=(width, height * num_plots))
if 'bins' in vargs:
bins = vargs['bins']
if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
# Use stepfilled when there are too many bins
vargs.setdefault('histtype', 'stepfilled')
if n == 1:
if num_plots == 1:
axes = [axes]
for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
axis.set_ylabel(y_label)
Expand All @@ -5529,14 +5519,21 @@ def draw_hist(values_dict):
if left_end is not None and right_end is not None:
x_shade, height_shade, width_shade = _compute_shading(heights, bins.copy(), left_end, right_end)
axis.bar(x_shade, height_shade, width=width_shade,
color=self.chart_colors[1], align="edge")
color=self.chart_colors[1], align="edge")
_vertical_x(axis)
if rug:
axis.scatter(values_for_hist, np.zeros_like(values_for_hist), marker="|",
color="black", s=100, zorder=10)
color="black", s=100, zorder=10)
type(self).plots.append(axis)

# Added to ensure proaper axis label dimensions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

proaper -> proper

plt.tight_layout()

draw_hist(values_dict)
# Added to make sure graph displays
plt.show()



def hist_of_counts(self, *columns, overlay=True, bins=None, bin_column=None,
group=None, side_by_side=False, width=None, height=None, **vargs):
Expand Down