You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.3 KiB

#' Process a PCAP with Zeek and create Parquet files
#'
#' @param pcap path to PCAP to process. ([path.expand()] will be called on this value)
#' @param out_dir path to Parquet files. ([path.expand()] will be called on this value)
#' If the directory does not exist it will be created. If ho directory is specified
#' a temporary directory will be created and used. You should
#' call [unlink()] on this path if you used a temporary directory.
#' @param zeek_opts extra options passed to to Zeek command line. NOTE:
#' `--no-checksums`, `LogAscii::use_json=T`, and `Log::default_scope_sep='_'`
#' are already handled by this function; no need to specify them.
#' @param ... extra named parameters passed on to [arrow::write_parquet()]
#' @return length 1 character vector of the expanded path of the `out_dir`
#' @note the `zeek` binary **must** be available on `PATH`. You can use the
#' environment variable `ZEEK_PATH` as a hint where [find_zeek()] will
#' look for the `zeek` binary.
#' @export
#' @examples
#' loc <- tryCatch(
#' pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr")),
#' error = function(e) message("No Zeek")
#' )
#'
#' if (!is.null(loc)) {
#' read_zeek_logs(loc)
#' unlink(loc) # don't do this IRL until you're done working with or saving.
#' }
pcap_to_zeek <- function(pcap, out_dir = tempfile(pattern = "zeek"), zeek_opts = c(), ...) {
pcap <- path.expand(pcap[1])
if (!file.exists(pcap)) {
stop(sprintf("PCAP [%s] not found.", pcap), call.=FALSE)
}
out_dir <- path.expand(out_dir[1])
if (!dir.exists(out_dir)) dir.create(out_dir)
pcap_link <- file.path(out_dir, basename(pcap))
if (!file.symlink(pcap, pcap_link)) {
stop(sprintf("Could not create symlink %s for %s.", pcap_link, pcap), call. = FALSE)
}
zeek_opts <- c("--no-checksums", "LogAscii::use_json=T", "Log::default_scope_sep='_'", zeek_opts, "-r", pcap_link)
wd <- getwd()
on.exit(setwd(wd))
setwd(out_dir)
system2(
command = find_zeek(),
args = zeek_opts,
env = c("ZEEK_LOG_SUFFIX=json")
) -> status
stopifnot("Error converting PCAP." = (status == 0))
if (!file.remove(pcap_link)) {
stop(sprintf("Could not remove symlink %s", pcap_link), call.=FALSE)
}
in_fils <- list.files(out_dir, pattern = "\\.json$", full.names = TRUE)
out_fils <- sub("\\.json$", ".parquet", in_fils)
for (idx in seq_along(in_fils)) {
arrow::write_parquet(
x = arrow::read_json_arrow(
file = in_fils[idx],
as_data_frame = FALSE
),
sink = out_fils[idx],
...
)
file.remove(in_fils[idx])
}
out_dir
}
#' Find the Zeek binary
#'
#' Use the environment variable `ZEEK_PATH` or specify the directory in
#' the call to this function.
#'
#' @param path hint to where to look for the Zeek binary
#' @export
#' @return length 1 character vector of the path to the zeek binary or `""`
#' @examples
#' loc <- tryCatch(
#' find_zeek(),
#' error = function(e) message("No Zeek")
#' )
find_zeek <- function(path = Sys.getenv("ZEEK_PATH", "")) {
if (path != "") {
Sys.setenv(
PATH = paste0(c(path, Sys.getenv("PATH")), collapse = .Platform$path.sep)
)
}
res <- Sys.which("zeek")
stopifnot(
c("Cannot locate Zeek binary." = (res != ""))
)
unname(res)
}
set_names <- function (object = nm, nm) {
names(object) <- nm
object
}