Skip to content

Commit

Permalink
Add handling for special date format on csv
Browse files Browse the repository at this point in the history
  • Loading branch information
dyang415 committed Oct 3, 2023
1 parent ecfea94 commit e7e652c
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 16 deletions.
3 changes: 2 additions & 1 deletion backend/app/data_source/file/file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from app import app
from app.data_source.models import Field, DateField, FileSchema
from app.insight.services.utils import load_df_from_csv
from config import ConfigKey


Expand Down Expand Up @@ -30,7 +31,7 @@ def __init__(self, file_name):

def load_schema(self) -> FileSchema:
logger.info("Loading file")
df = pl.read_csv(f"{self.temp_file_path}/{self.file_name}", try_parse_dates=True)
df = load_df_from_csv(f"{self.temp_file_path}/{self.file_name}")

logger.info("Calculating distinct values")
column_to_num_distinct_values = df.select(
Expand Down
13 changes: 7 additions & 6 deletions backend/app/insight/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from app.insight.services.insight_builders import DFBasedInsightBuilder
from app.insight.services.metrics import AggregateMethod, SingleColumnMetric, DualColumnMetric, CombineMethod, DimensionValuePair, Filter
from app.insight.services.segment_insight_builder import get_related_segments, get_segment_insight
from app.insight.services.utils import load_df_from_csv


class InsightApi(BaseApi):
Expand Down Expand Up @@ -134,8 +135,8 @@ def get_segment_insight(self):
filtering_clause = filtering_clause & (pl.col(
sub_key['dimension']).cast(str).eq(pl.lit(sub_key['value'])))

df = pl.read_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).str.slice(0, 10).str.to_date().alias("date")) \
df = load_df_from_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).cast(pl.Utf8).str.slice(0, 10).str.to_date().alias("date")) \
.filter(filtering_clause)

return orjson.dumps(
Expand All @@ -160,8 +161,8 @@ def get_related_segments(self):

file_id = data['fileId']
logger.info('Reading file')
df = pl.read_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).str.slice(0, 10).str.to_date().alias("date"))
df = load_df_from_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).cast(pl.Utf8).str.slice(0, 10).str.to_date().alias("date"))

return orjson.dumps(
get_related_segments(
Expand All @@ -187,8 +188,8 @@ def get_insight(self):

try:
logger.info('Reading file')
df = pl.read_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).str.slice(0, 10).str.to_date(strict=False).alias("date"))
df = load_df_from_csv(f'/tmp/dsensei/{file_id}') \
.with_columns(pl.col(date_column).cast(pl.Utf8).str.slice(0, 10).str.to_date(strict=False).alias("date"))

logger.info('File loaded')
insight_builder = DFBasedInsightBuilder(
Expand Down
17 changes: 17 additions & 0 deletions backend/app/insight/services/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,20 @@ def get_filter_expression(filters: list[Filter]) -> Expr:
filter_expr = filter_expr & expr

return filter_expr


def load_df_from_csv(path: str):
df = pl.read_csv(path, try_parse_dates=True)
for column_and_d_type in zip(df.columns, df.dtypes):
[column, d_type] = column_and_d_type
if d_type == pl.Utf8:
non_null_count = df.filter(pl.col(column).str.lengths().gt(0) & pl.col(column).is_not_null()).select(pl.col(column).count()).row(0)[0]
if non_null_count > 0:
try:
df = df.with_columns(
pl.col(column).str.to_date("%-m/%-d/%y %k:%M").alias(column)
)
except:
print(column)
pass
return df
32 changes: 23 additions & 9 deletions frontend/src/components/uploader/DatePicker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from "@tremor/react";
import moment from "moment";
import { useEffect, useState } from "react";
import { createNewDateWithBrowserTimeZone } from "../../common/utils";

export interface DateRangeStats {
numDays?: number;
Expand Down Expand Up @@ -63,13 +64,13 @@ function DatePicker({
const [baseDateRangeMode, setBaseDateRangeMode] =
useState<BaseDateMode>("previous");

let minDate, maxDate;
let minDate: Date | undefined, maxDate: Date | undefined;
if (countByDate) {
const dates = Object.keys(countByDate).sort(
(d1, d2) => new Date(d1).getTime() - new Date(d2).getTime()
);
minDate = new Date(dates[0]);
maxDate = new Date(dates[dates.length - 1]);
minDate = createNewDateWithBrowserTimeZone(dates[0]);
maxDate = createNewDateWithBrowserTimeZone(dates[dates.length - 1]);
}

function getComparisonDateRangePreviousPeriodDateRange(
Expand All @@ -85,9 +86,21 @@ function DatePicker({
const previousPeriodToDate = new Date(fromDate);
previousPeriodToDate.setDate(fromDate.getDate() - 1);

const sanitizedPreviousPeriodFromDate = moment(
previousPeriodFromDate
).isBefore(moment(minDate))
? minDate
: previousPeriodFromDate;

const sanitizedPreviousPeriodToDate = moment(
previousPeriodToDate
).isBefore(moment(sanitizedPreviousPeriodFromDate))
? sanitizedPreviousPeriodFromDate
: previousPeriodToDate;

return {
from: previousPeriodFromDate,
to: previousPeriodToDate,
from: sanitizedPreviousPeriodFromDate,
to: sanitizedPreviousPeriodToDate,
};
}

Expand Down Expand Up @@ -119,7 +132,7 @@ function DatePicker({
let numRows = 0;
if (countByDate) {
while (
moment(date).format("YYYY-MM-DD") !==
moment(date).format("YYYY-MM-DD") <=
moment(toDate).format("YYYY-MM-DD")
) {
numRows += countByDate[moment(date).format("YYYY-MM-DD")] ?? 0;
Expand All @@ -136,7 +149,7 @@ function DatePicker({
return {};
}

function updateBaseDateRangeToIfNecessary(
function updateBaseDateRangeIfNecessary(
baseDateMode: BaseDateMode,
comparisonDateRange: DateRangePickerValue
) {
Expand All @@ -148,6 +161,7 @@ function DatePicker({
getComparisonDateRangePreviousPeriodDateRange(comparisonDateRange);

if (previousPeriodDateRange.to && previousPeriodDateRange.from) {
console.log(previousPeriodDateRange);
setBaseDateRangeData({
range: previousPeriodDateRange,
stats: getStatsForDateRange(previousPeriodDateRange),
Expand All @@ -160,7 +174,7 @@ function DatePicker({
range: value,
stats: getStatsForDateRange(value),
});
updateBaseDateRangeToIfNecessary(baseDateRangeMode, value);
updateBaseDateRangeIfNecessary(baseDateRangeMode, value);
}

function onBaseDateRangeChange(value: DateRangePickerValue) {
Expand All @@ -186,7 +200,7 @@ function DatePicker({

function onBaseDateModeChange(value: BaseDateMode) {
setBaseDateRangeMode(value);
updateBaseDateRangeToIfNecessary(value, comparisonDateRangeData.range);
updateBaseDateRangeIfNecessary(value, comparisonDateRangeData.range);
}

function shouldDisplayWarning() {
Expand Down

0 comments on commit e7e652c

Please sign in to comment.