diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 431afcc30..9723be5ce 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -42,7 +42,6 @@ if TYPE_CHECKING: from datafusion.context import SessionContext - __all__ = [ "abs", "acos", @@ -268,13 +267,18 @@ "sum", "tan", "tanh", + "to_char", + "to_date", "to_hex", + "to_local_time", + "to_time", "to_timestamp", "to_timestamp_micros", "to_timestamp_millis", "to_timestamp_nanos", "to_timestamp_seconds", "to_unixtime", + "today", "translate", "trim", "trunc", @@ -1010,6 +1014,56 @@ def now() -> Expr: return Expr(f.now()) +def to_char(arg: Expr, formatter: Expr) -> Expr: + """Returns a string representation of a date, time, timestamp or duration. + + For usage of ``formatter`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + return Expr(f.to_char(arg.expr, formatter.expr)) + + +def _unwrap_exprs(args: tuple[Expr, ...]) -> list: + return [arg.expr for arg in args] + + +def to_date(arg: Expr, *formatters: Expr) -> Expr: + """Converts a value to a date (YYYY-MM-DD). + + Supports strings, numeric and timestamp types as input. + Integers and doubles are interpreted as days since the unix epoch. + Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') + if ``formatters`` are not provided. + + For usage of ``formatters`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters))) + + +def to_local_time(*args: Expr) -> Expr: + """Converts a timestamp with a timezone to a timestamp without a timezone. + + This function handles daylight saving time changes. + """ + return Expr(f.to_local_time(*_unwrap_exprs(args))) + + +def to_time(arg: Expr, *formatters: Expr) -> Expr: + """Converts a value to a time. Supports strings and timestamps as input. + + If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or + HH:MM:SS.nnnnnnnnn; + + For usage of ``formatters`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters))) + + def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. @@ -1017,11 +1071,7 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) """ - if formatters is None: - return f.to_timestamp(arg.expr) - - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp(arg.expr, *formatters)) + return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters))) def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: @@ -1029,8 +1079,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp_millis(arg.expr, *formatters)) + return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters))) def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: @@ -1038,8 +1087,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp_micros(arg.expr, *formatters)) + return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters))) def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: @@ -1047,8 +1095,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) + return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters))) def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: @@ -1056,14 +1103,12 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] - return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) + return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters))) def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr: """Converts a string and optional formats to a Unixtime.""" - args = [f.expr for f in format_arguments] - return Expr(f.to_unixtime(string.expr, *args)) + return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments))) def current_date() -> Expr: @@ -1071,6 +1116,9 @@ def current_date() -> Expr: return Expr(f.current_date()) +today = current_date + + def current_time() -> Expr: """Returns current UTC time as a Time64 value.""" return Expr(f.current_time()) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 7b3332ed7..8dc1ce780 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import math -from datetime import datetime, timezone +from datetime import date, datetime, time, timezone import numpy as np import pyarrow as pa @@ -952,6 +952,12 @@ def test_temporal_functions(df): f.to_timestamp_nanos( literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") ), + f.to_time(literal("12:30:45")), + f.to_time(literal("12-30-45"), literal("%H-%M-%S")), + f.to_date(literal("2017-05-31")), + f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")), + f.to_local_time(column("d")), + f.to_char(column("d"), literal("%d-%m-%Y")), ) result = df.collect() assert len(result) == 1 @@ -1026,6 +1032,73 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, type=pa.timestamp("ns"), ) + assert result.column(17) == pa.array( + [time(12, 30, 45)] * 3, + type=pa.time64("ns"), + ) + assert result.column(18) == pa.array( + [time(12, 30, 45)] * 3, + type=pa.time64("ns"), + ) + assert result.column(19) == pa.array( + [date(2017, 5, 31)] * 3, + type=pa.date32(), + ) + assert result.column(20) == pa.array( + [date(2017, 5, 31)] * 3, + type=pa.date32(), + ) + assert result.column(21) == pa.array( + [ + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("us"), + ) + + assert result.column(22) == pa.array( + [ + "31-12-2022", + "26-06-2027", + "02-07-2020", + ], + type=pa.string(), + ) + + +def test_to_time_invalid_input(df): + with pytest.raises(Exception, match=r"Error parsing 'not-a-time' as time"): + df.select(f.to_time(literal("not-a-time"))).collect() + + +def test_to_time_mismatched_formatter(df): + with pytest.raises(Exception, match=r"Error parsing '12:30:45' as time"): + df.select(f.to_time(literal("12:30:45"), literal("%Y-%m-%d"))).collect() + + +def test_to_date_invalid_input(df): + with pytest.raises(Exception, match=r"Date32"): + df.select(f.to_date(literal("not-a-date"))).collect() + + +def test_temporal_formatter_requires_expr(): + with pytest.raises(AttributeError, match="'str' object has no attribute 'expr'"): + f.to_time(literal("12:30:45"), "not-an-expr") + + +def test_today_returns_date32(df): + result = df.select(f.today().alias("today")).collect()[0] + assert result.column(0).type == pa.date32() + + +def test_today_alias_matches_current_date(df): + result = df.select( + f.current_date().alias("current_date"), + f.today().alias("today"), + ).collect()[0] + + assert result.column(0) == result.column(1) def test_arrow_cast(df): diff --git a/src/functions.rs b/src/functions.rs index 90b3a0a4b..c32134054 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -601,6 +601,9 @@ expr_fn!( "Converts the number to its equivalent hexadecimal representation." ); expr_fn!(now); +expr_fn_vec!(to_date); +expr_fn_vec!(to_local_time); +expr_fn_vec!(to_time); expr_fn_vec!(to_timestamp); expr_fn_vec!(to_timestamp_millis); expr_fn_vec!(to_timestamp_nanos); @@ -613,6 +616,7 @@ expr_fn!(date_part, part date); expr_fn!(date_trunc, part date); expr_fn!(date_bin, stride source origin); expr_fn!(make_date, year month day); +expr_fn!(to_char, datetime format); expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); expr_fn_vec!( @@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(tan))?; m.add_wrapped(wrap_pyfunction!(tanh))?; m.add_wrapped(wrap_pyfunction!(to_hex))?; + m.add_wrapped(wrap_pyfunction!(to_char))?; + m.add_wrapped(wrap_pyfunction!(to_date))?; + m.add_wrapped(wrap_pyfunction!(to_local_time))?; + m.add_wrapped(wrap_pyfunction!(to_time))?; m.add_wrapped(wrap_pyfunction!(to_timestamp))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;