diff --git a/src/ast/query.rs b/src/ast/query.rs index 159f02a6c..0b6f1e04a 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2860,6 +2860,8 @@ impl fmt::Display for OrderBy { pub struct OrderByExpr { /// The expression to order by. pub expr: Expr, + /// Optional PostgreSQL `USING ` clause. + pub using_operator: Option, /// Ordering options such as `ASC`/`DESC` and `NULLS` behavior. pub options: OrderByOptions, /// Optional `WITH FILL` clause (ClickHouse extension) which specifies how to fill gaps. @@ -2870,6 +2872,7 @@ impl From for OrderByExpr { fn from(ident: Ident) -> Self { OrderByExpr { expr: Expr::Identifier(ident), + using_operator: None, options: OrderByOptions::default(), with_fill: None, } @@ -2878,7 +2881,15 @@ impl From for OrderByExpr { impl fmt::Display for OrderByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}{}", self.expr, self.options)?; + write!(f, "{}", self.expr)?; + if let Some(using_operator) = &self.using_operator { + if using_operator.0.len() > 1 { + write!(f, " USING OPERATOR({using_operator})")?; + } else { + write!(f, " USING {using_operator}")?; + } + } + write!(f, "{}", self.options)?; if let Some(ref with_fill) = self.with_fill { write!(f, " {with_fill}")? } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 0b95c3ed7..466aaf254 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2087,6 +2087,7 @@ impl Spanned for OrderByExpr { fn span(&self) -> Span { let OrderByExpr { expr, + using_operator: _, options: _, with_fill, } = self; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bea566bbe..6bc791a4b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -508,10 +508,10 @@ impl<'a> Parser<'a> { Token::EOF => break, // end of statement - Token::Word(word) => { - if expecting_statement_delimiter && word.keyword == Keyword::END { - break; - } + Token::Word(word) + if expecting_statement_delimiter && word.keyword == Keyword::END => + { + break; } _ => {} } @@ -1298,41 +1298,40 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token_ref().token == Token::Period { - let mut id_parts: Vec = vec![match t { - Token::Word(w) => w.into_ident(next_token.span), - Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => { - return Err(ParserError::ParserError( - "Internal parser error: unexpected token type".to_string(), - )) + t @ (Token::Word(_) | Token::SingleQuotedString(_)) + if self.peek_token_ref().token == Token::Period => + { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.into_ident(next_token.span), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected token type".to_string(), + )) + } + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.into_ident(next_token.span)), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) } - }]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.into_ident(next_token.span)), - Token::SingleQuotedString(s) => { - // SQLite has single-quoted identifiers - id_parts.push(Ident::with_quote('\'', s)) - } - Token::Placeholder(s) => { - // Snowflake uses $1, $2, etc. for positional column references - // in staged data queries like: SELECT t.$1 FROM @stage t - id_parts.push(Ident::new(s)) - } - Token::Mul => { - return Ok(Expr::QualifiedWildcard( - ObjectName::from(id_parts), - AttachedToken(next_token), - )); - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + id_parts.push(Ident::new(s)) + } + Token::Mul => { + return Ok(Expr::QualifiedWildcard( + ObjectName::from(id_parts), + AttachedToken(next_token), + )); + } + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); } } } @@ -4990,10 +4989,10 @@ impl<'a> Parser<'a> { loop { match &self.peek_nth_token_ref(0).token { Token::EOF => break, - Token::Word(w) => { - if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) { - break; - } + Token::Word(w) + if w.quote_style.is_none() && terminal_keywords.contains(&w.keyword) => + { + break; } _ => {} } @@ -8173,70 +8172,60 @@ impl<'a> Parser<'a> { Keyword::LINES, Keyword::NULL, ]) { - Some(Keyword::FIELDS) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + Some(Keyword::FIELDS) + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier()?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsTerminatedBy, + delimiter: HiveDelimiter::FieldsEscapedBy, char: self.parse_identifier()?, }); - - if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsEscapedBy, - char: self.parse_identifier()?, - }); - } - } else { - break; } } - Some(Keyword::COLLECTION) => { + Some(Keyword::COLLECTION) if self.parse_keywords(&[ Keyword::ITEMS, Keyword::TERMINATED, Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::CollectionItemsTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + ]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::MAP) => { + Some(Keyword::MAP) if self.parse_keywords(&[ Keyword::KEYS, Keyword::TERMINATED, Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::MapKeysTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + ]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::LINES) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::LinesTerminatedBy, - char: self.parse_identifier()?, - }); - } else { - break; - } + Some(Keyword::LINES) + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier()?, + }); } - Some(Keyword::NULL) => { - if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::NullDefinedAs, - char: self.parse_identifier()?, - }); - } else { - break; - } + Some(Keyword::NULL) + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) => + { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier()?, + }); } _ => { break; @@ -18083,7 +18072,32 @@ impl<'a> Parser<'a> { None }; - let options = self.parse_order_by_options()?; + let using_operator = if !with_operator_class + && dialect_of!(self is PostgreSqlDialect) + && self.parse_keyword(Keyword::USING) + { + Some(self.parse_order_by_using_operator()?) + } else { + None + }; + + let options = if using_operator.is_some() { + if self + .peek_one_of_keywords(&[Keyword::ASC, Keyword::DESC]) + .is_some() + { + return parser_err!( + "ASC/DESC cannot be used together with USING in ORDER BY".to_string(), + self.peek_token_ref().span.start + ); + } + OrderByOptions { + asc: None, + nulls_first: self.parse_order_by_nulls_first_last(), + } + } else { + self.parse_order_by_options()? + }; let with_fill = if self.dialect.supports_with_fill() && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) @@ -18096,6 +18110,7 @@ impl<'a> Parser<'a> { Ok(( OrderByExpr { expr, + using_operator, options, with_fill, }, @@ -18103,16 +18118,68 @@ impl<'a> Parser<'a> { )) } - fn parse_order_by_options(&mut self) -> Result { - let asc = self.parse_asc_desc(); + fn parse_order_by_using_operator(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.expect_token(&Token::LParen)?; + let operator_name = self.parse_operator_name()?; + let Some(last_part) = operator_name.0.last() else { + return self.expected_ref("an operator name", self.peek_token_ref()); + }; + let operator = last_part.to_string(); + if !Self::is_valid_order_by_using_operator_symbol(&operator) { + return self.expected_ref("an operator name", self.peek_token_ref()); + } + self.expect_token(&Token::RParen)?; + return Ok(operator_name); + } - let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { + let token = self.next_token(); + let operator = token.token.to_string(); + if Self::is_valid_order_by_using_operator_symbol(&operator) { + Ok(ObjectName::from(vec![Ident::new(operator)])) + } else { + self.expected_ref("an ordering operator after USING", &token) + } + } + + fn is_valid_order_by_using_operator_symbol(symbol: &str) -> bool { + !symbol.is_empty() + && symbol.chars().all(|c| { + matches!( + c, + '+' | '-' + | '*' + | '/' + | '<' + | '>' + | '=' + | '~' + | '!' + | '@' + | '#' + | '%' + | '^' + | '&' + | '|' + | '`' + | '?' + ) + }) + } + + fn parse_order_by_nulls_first_last(&mut self) -> Option { + if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { Some(true) } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { Some(false) } else { None - }; + } + } + + fn parse_order_by_options(&mut self) -> Result { + let asc = self.parse_asc_desc(); + let nulls_first = self.parse_order_by_nulls_first_last(); Ok(OrderByOptions { asc, nulls_first }) } @@ -20309,6 +20376,7 @@ mod tests { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, operator_class: None, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ce962cb80..208dee652 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2721,6 +2721,7 @@ fn test_export_data() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, },]), interpolate: None, @@ -2827,6 +2828,7 @@ fn test_export_data() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, },]), interpolate: None, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 82f79577b..69624fabe 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -334,6 +334,7 @@ fn parse_alter_table_add_projection() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }]), interpolate: None, @@ -1162,6 +1163,7 @@ fn parse_select_order_by_with_fill_interpolate() { asc: Some(true), nulls_first: Some(true), }, + using_operator: None, with_fill: Some(WithFill { from: Some(Expr::value(number("10"))), to: Some(Expr::value(number("20"))), @@ -1174,6 +1176,7 @@ fn parse_select_order_by_with_fill_interpolate() { asc: Some(false), nulls_first: Some(false), }, + using_operator: None, with_fill: Some(WithFill { from: Some(Expr::value(number("30"))), to: Some(Expr::value(number("40"))), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 982bf1088..86d6a66f6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2575,6 +2575,7 @@ fn parse_select_order_by() { asc: Some(true), nulls_first: None, }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -2583,6 +2584,7 @@ fn parse_select_order_by() { asc: Some(false), nulls_first: None, }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -2591,6 +2593,7 @@ fn parse_select_order_by() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, ]), @@ -2616,6 +2619,7 @@ fn parse_select_order_by_limit() { asc: Some(true), nulls_first: None, }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -2624,6 +2628,7 @@ fn parse_select_order_by_limit() { asc: Some(false), nulls_first: None, }, + using_operator: None, with_fill: None, }, ]), @@ -2737,6 +2742,7 @@ fn parse_select_order_by_not_support_all() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }]), ), @@ -2748,6 +2754,7 @@ fn parse_select_order_by_not_support_all() { asc: Some(true), nulls_first: Some(true), }, + using_operator: None, with_fill: None, }]), ), @@ -2759,6 +2766,7 @@ fn parse_select_order_by_not_support_all() { asc: Some(false), nulls_first: Some(false), }, + using_operator: None, with_fill: None, }]), ), @@ -2782,6 +2790,7 @@ fn parse_select_order_by_nulls_order() { asc: Some(true), nulls_first: Some(true), }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -2790,6 +2799,7 @@ fn parse_select_order_by_nulls_order() { asc: Some(false), nulls_first: Some(false), }, + using_operator: None, with_fill: None, }, ]), @@ -3012,6 +3022,7 @@ fn parse_select_qualify() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }], window_frame: None, @@ -3457,6 +3468,7 @@ fn parse_listagg() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -3469,6 +3481,7 @@ fn parse_listagg() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, ] @@ -5728,6 +5741,7 @@ fn parse_window_functions() { asc: Some(false), nulls_first: None, }, + using_operator: None, with_fill: None, }], window_frame: None, @@ -5954,6 +5968,7 @@ fn test_parse_named_window() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }], window_frame: None, @@ -9415,6 +9430,7 @@ fn parse_create_index() { operator_class: None, column: OrderByExpr { expr: Expr::Identifier(Ident::new("name")), + using_operator: None, with_fill: None, options: OrderByOptions { asc: None, @@ -9426,6 +9442,7 @@ fn parse_create_index() { operator_class: None, column: OrderByExpr { expr: Expr::Identifier(Ident::new("age")), + using_operator: None, with_fill: None, options: OrderByOptions { asc: Some(false), @@ -9461,6 +9478,7 @@ fn test_create_index_with_using_function() { operator_class: None, column: OrderByExpr { expr: Expr::Identifier(Ident::new("name")), + using_operator: None, with_fill: None, options: OrderByOptions { asc: None, @@ -9472,6 +9490,7 @@ fn test_create_index_with_using_function() { operator_class: None, column: OrderByExpr { expr: Expr::Identifier(Ident::new("age")), + using_operator: None, with_fill: None, options: OrderByOptions { asc: Some(false), @@ -9522,6 +9541,7 @@ fn test_create_index_with_with_clause() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, operator_class: None, @@ -13148,6 +13168,7 @@ fn test_match_recognize() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }], measures: vec![ diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 1b0948518..1c9e114a2 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -174,6 +174,7 @@ fn create_table_with_clustered_by() { asc: Some(true), nulls_first: None, }, + using_operator: None, with_fill: None, }, OrderByExpr { @@ -182,6 +183,7 @@ fn create_table_with_clustered_by() { asc: Some(false), nulls_first: None, }, + using_operator: None, with_fill: None, }, ]), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 30405623d..046604310 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -682,6 +682,7 @@ fn table_constraint_unique_primary_ctor( asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, operator_class: None, @@ -2751,6 +2752,7 @@ fn parse_delete_with_order_by() { asc: Some(false), nulls_first: None, }, + using_operator: None, with_fill: None, }], order_by diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7c19f51e5..792cd9687 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2686,6 +2686,7 @@ fn parse_create_indices_with_operator_classes() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, operator_class: expected_operator_class.clone(), @@ -2750,6 +2751,7 @@ fn parse_create_indices_with_operator_classes() { asc: None, nulls_first: None, }, + using_operator: None, with_fill: None, }, operator_class: None @@ -5685,6 +5687,85 @@ fn parse_array_agg() { pg().verified_stmt(sql4); } +#[test] +fn parse_pg_aggregate_order_by_using_operator() { + let sql = "SELECT aggfns(DISTINCT a, a, c ORDER BY c USING ~<~, a) FROM t"; + let select = pg().verified_only_select(sql); + let SelectItem::UnnamedExpr(Expr::Function(Function { + args: FunctionArguments::List(FunctionArgumentList { clauses, .. }), + .. + })) = &select.projection[0] + else { + unreachable!("expected aggregate function in projection"); + }; + + let Some(FunctionArgumentClause::OrderBy(order_by_exprs)) = clauses + .iter() + .find(|clause| matches!(clause, FunctionArgumentClause::OrderBy(_))) + else { + unreachable!("expected ORDER BY clause in aggregate function argument list"); + }; + + assert_eq!( + order_by_exprs[0].using_operator, + Some(ObjectName::from(vec!["~<~".into()])) + ); + assert_eq!(order_by_exprs[1].using_operator, None); +} + +#[test] +fn parse_pg_order_by_using_operator_syntax() { + pg().one_statement_parses_to( + "SELECT a FROM t ORDER BY a USING OPERATOR(<)", + "SELECT a FROM t ORDER BY a USING <", + ); + + let query = + pg().verified_query("SELECT a FROM t ORDER BY a USING OPERATOR(pg_catalog.<) NULLS LAST"); + let order_by = query.order_by.expect("expected ORDER BY clause"); + let OrderByKind::Expressions(exprs) = order_by.kind else { + unreachable!("expected ORDER BY expressions"); + }; + + assert_eq!( + exprs[0].using_operator, + Some(ObjectName::from(vec![ + Ident::new("pg_catalog"), + Ident::new("<"), + ])) + ); + assert_eq!(exprs[0].options.asc, None); + assert_eq!(exprs[0].options.nulls_first, Some(false)); +} + +#[test] +fn parse_pg_order_by_using_operator_invalid_cases() { + let err = pg() + .parse_sql_statements("SELECT a FROM t ORDER BY a USING ;") + .unwrap_err(); + assert!( + matches!(err, ParserError::ParserError(msg) if msg.contains("an ordering operator after USING")) + ); + + let err = pg() + .parse_sql_statements("SELECT a FROM t ORDER BY a USING OPERATOR();") + .unwrap_err(); + assert!(matches!(err, ParserError::ParserError(msg) if msg.contains("an operator name"))); + + let err = pg() + .parse_sql_statements("SELECT a FROM t ORDER BY a USING < DESC;") + .unwrap_err(); + assert!( + matches!(err, ParserError::ParserError(msg) if msg.contains("ASC/DESC cannot be used together with USING in ORDER BY")) + ); + + // `USING` in ORDER BY is PostgreSQL-specific and should not parse in GenericDialect. + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + assert!(generic + .parse_sql_statements("SELECT a FROM t ORDER BY a USING <;") + .is_err()); +} + #[test] fn parse_mat_cte() { let sql = r#"WITH cte AS MATERIALIZED (SELECT id FROM accounts) SELECT id FROM cte"#;