Skip to content

[SPARK-21102][SQL] Refresh command is too aggressive in parsing #18368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ statement
| (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
tableIdentifier partitionSpec? describeColName? #describeTable
| REFRESH TABLE tableIdentifier #refreshTable
| REFRESH .*? #refreshResource
| REFRESH (STRING | .*?) #refreshResource
| CACHE LAZY? TABLE tableIdentifier (AS? query)? #cacheTable
| UNCACHE TABLE (IF EXISTS)? tableIdentifier #uncacheTable
| CLEAR CACHE #clearCache
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,25 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
}

/**
* Create a [[RefreshTable]] logical plan.
* Create a [[RefreshResource]] logical plan.
*/
override def visitRefreshResource(ctx: RefreshResourceContext): LogicalPlan = withOrigin(ctx) {
val resourcePath = remainder(ctx.REFRESH.getSymbol).trim
RefreshResource(resourcePath)
val path = if (ctx.STRING != null) string(ctx.STRING) else extractUnquotedResourcePath(ctx)
RefreshResource(path)
}

private def extractUnquotedResourcePath(ctx: RefreshResourceContext): String = withOrigin(ctx) {
val unquotedPath = remainder(ctx.REFRESH.getSymbol).trim
validate(
unquotedPath != null && !unquotedPath.isEmpty,
"Resource paths cannot be empty in REFRESH statements. Use / to match everything",
ctx)
val forbiddenSymbols = Seq(" ", "\n", "\r", "\t")
validate(
!forbiddenSymbols.exists(unquotedPath.contains(_)),
"REFRESH statements cannot contain ' ', '\\n', '\\r', '\\t' inside unquoted resource paths",
ctx)
unquotedPath
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder}
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.CreateTable
import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource}
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}

Expand Down Expand Up @@ -66,6 +66,25 @@ class SparkSqlParserSuite extends AnalysisTest {
}
}

test("refresh resource") {
assertEqual("REFRESH prefix_path", RefreshResource("prefix_path"))
assertEqual("REFRESH /", RefreshResource("/"))
assertEqual("REFRESH /path///a", RefreshResource("/path///a"))
assertEqual("REFRESH pat1h/112/_1a", RefreshResource("pat1h/112/_1a"))
assertEqual("REFRESH pat1h/112/_1a/a-1", RefreshResource("pat1h/112/_1a/a-1"))
assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash"))
assertEqual("REFRESH \'path with space\'", RefreshResource("path with space"))
assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with space 2"))
intercept("REFRESH a b", "REFRESH statements cannot contain")
intercept("REFRESH a\tb", "REFRESH statements cannot contain")
intercept("REFRESH a\nb", "REFRESH statements cannot contain")
intercept("REFRESH a\rb", "REFRESH statements cannot contain")
intercept("REFRESH a\r\nb", "REFRESH statements cannot contain")
intercept("REFRESH @ $a$", "REFRESH statements cannot contain")
intercept("REFRESH ", "Resource paths cannot be empty in REFRESH statements")
intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
}

test("show functions") {
assertEqual("show functions", ShowFunctionsCommand(None, None, true, true))
assertEqual("show all functions", ShowFunctionsCommand(None, None, true, true))
Expand Down