Access and Query Amazon Athena via DBI/JDBC
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

jdbc.r 5.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. #' AthenaJDBC
  2. #'
  3. #' @export
  4. setClass(
  5. "AthenaDriver",
  6. representation("JDBCDriver", identifier.quote="character", jdrv="jobjRef")
  7. )
  8. #' AthenaJDBC
  9. #'
  10. #' @export
  11. Athena <- function(identifier.quote='`') {
  12. drv <- JDBC(driverClass="com.amazonaws.athena.jdbc.AthenaDriver",
  13. system.file("AthenaJDBC41-1.1.0.jar", package="metis"),
  14. identifier.quote=identifier.quote)
  15. return(as(drv, "AthenaDriver"))
  16. }
  17. #' AthenaJDBC
  18. #'
  19. #' @param provider JDBC auth provider (ideally leave default)
  20. #' @param region AWS region the Athena tables are in
  21. #' @param s3_staging_dir A write-able bucket on S3 that you have permissions for
  22. #' @param schema_name LOL if only this actually worked with Amazon's hacked Presto driver
  23. #' @param max_error_retries,connection_timeout,socket_timeout,retry_base_delay,retry_max_backoff_time
  24. #' technical connection info that you should only muck with if you know what you're doing.
  25. #' @param log_path,log_level The Athena JDBC driver can (shockingly) provide a decent bit
  26. #' of data in logs. Set this to a temporary directory or somethign log4j can use.
  27. #' @param ... unused
  28. #' @references <https://docs.aws.amazon.com/athena/latest/ug/connect-with-jdbc.html>
  29. #' @export
  30. setMethod(
  31. "dbConnect",
  32. "AthenaDriver",
  33. def = function(drv,
  34. provider = "com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain",
  35. region = "us-east-1",
  36. s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
  37. schema_name = "default",
  38. max_error_retries = 10,
  39. connection_timeout = 10000,
  40. socket_timeout = 10000,
  41. retry_base_delay = 100,
  42. retry_max_backoff_time = 1000,
  43. log_path,
  44. log_level,
  45. ...) {
  46. conn_string = sprintf(
  47. 'jdbc:awsathena://athena.%s.amazonaws.com:443/%s', region, schema_name
  48. )
  49. jc <- callNextMethod(drv, conn_string,
  50. s3_staging_dir = s3_staging_dir,
  51. schema_name = schema_name,
  52. max_error_retries = max_error_retries,
  53. connection_timeout = connection_timeout,
  54. socket_timeout = socket_timeout,
  55. retry_base_delay = retry_base_delay,
  56. retry_max_backoff_time = retry_max_backoff_time,
  57. log_path = log_path,
  58. log_level = log_level,
  59. aws_credentials_provider_class = provider,
  60. ...)
  61. return(as(jc, "AthenaConnection"))
  62. }
  63. )
  64. #' AthenaJDBC
  65. #'
  66. #' @export
  67. setClass("AthenaConnection", contains = "JDBCConnection")
  68. #' AthenaJDBC
  69. #'
  70. #' @export
  71. setClass("AthenaResult", contains = "JDBCResult")
  72. #' AthenaJDBC
  73. #'
  74. #' @param conn Athena connection
  75. #' @param statement SQL statement
  76. #' @param ... unused
  77. #' @export
  78. setMethod(
  79. "dbSendQuery",
  80. signature(conn="AthenaConnection", statement="character"),
  81. def = function(conn, statement, ...) {
  82. return(as(callNextMethod(), "AthenaResult"))
  83. }
  84. )
  85. #' AthenaJDBC
  86. #'
  87. #' @param conn Athena connection
  88. #' @param statement SQL statement
  89. #' @param ... unused
  90. #' @export
  91. setMethod(
  92. "dbGetQuery",
  93. signature(conn="AthenaConnection", statement="character"),
  94. def = function(conn, statement, type_convert=FALSE, ...) {
  95. r <- dbSendQuery(conn, statement, ...)
  96. on.exit(.jcall(r@stat, "V", "close"))
  97. res <- dplyr::tbl_df(fetch(r, -1, block=1000))
  98. if (type_convert) res <- readr::type_convert(res)
  99. res
  100. }
  101. )
  102. #' AthenaJDBC
  103. #'
  104. #' @param conn Athena connection
  105. #' @param pattern table name pattern
  106. #' @param schema Athena schema name
  107. #' @param ... unused
  108. #' @export
  109. setMethod(
  110. "dbListTables",
  111. signature(conn="AthenaConnection"),
  112. def = function(conn, pattern='*', schema, ...) {
  113. if (missing(pattern)) {
  114. dbGetQuery(
  115. conn, sprintf("SHOW TABLES IN %s", schema)
  116. ) -> x
  117. } else {
  118. dbGetQuery(
  119. conn, sprintf("SHOW TABLES IN %s %s", schema, dbQuoteString(conn, pattern))
  120. ) -> x
  121. }
  122. x$tab_name
  123. }
  124. )
  125. #' AthenaJDBC
  126. #'
  127. #' @param conn Athena connection
  128. #' @param name table name
  129. #' @param schema Athena schema name
  130. #' @param ... unused
  131. #' @export
  132. setMethod(
  133. "dbExistsTable",
  134. signature(conn="AthenaConnection", name="character"),
  135. def = function(conn, name, schema, ...) {
  136. length(dbListTables(conn, schema=schema, pattern=name)) > 0
  137. }
  138. )
  139. #' AthenaJDBC
  140. #'
  141. #' @param conn Athena connection
  142. #' @param name table name
  143. #' @param schema Athena schema name
  144. #' @param ... unused
  145. #' @export
  146. setMethod(
  147. "dbListFields",
  148. signature(conn="AthenaConnection", name="character"),
  149. def = function(conn, name, schema, ...) {
  150. query <- sprintf("SELECT * FROM %s.%s LIMIT 1", schema, name)
  151. res <- dbGetQuery(conn, query)
  152. colnames(res)
  153. }
  154. )
  155. #' AthenaJDBC
  156. #'
  157. #' @param conn Athena connection
  158. #' @param name table name
  159. #' @param schema Athena schema name
  160. #' @param ... unused
  161. #' @export
  162. setMethod(
  163. "dbReadTable",
  164. signature(conn="AthenaConnection", name="character"),
  165. def = function(conn, name, schema, ...) {
  166. query <- sprintf("SELECT * FROM %s.%s LIMIT 1", schema, dbQuoteString(conn, name))
  167. dbGetQuery(conn, query)
  168. }
  169. )