Compare commits

...

No commits in common. 'd1dc5a1779a334fe7ab04c6ba5c48ea2f96549e0' and '946c8082cf85273c84fb608083a1de5a6f02b074' have entirely different histories.

  1. 27
      .Rbuildignore
  2. 1
      .codecov.yml
  3. 1
      .github/FUNDING.yaml
  4. 26
      .github/workflows/R-CMD-check.yaml
  5. 9
      .gitignore
  6. 12
      .travis.yml
  7. 25
      CONDUCT.md
  8. 28
      DESCRIPTION
  9. 14
      NAMESPACE
  10. 2
      NEWS.md
  11. 10
      R/get-zeek.R
  12. 106
      R/pcap_to_zeek.R
  13. 32
      R/read-zeek-logs.R
  14. 86
      R/zeek-man.R
  15. 25
      R/zeek-redefs.R
  16. 34
      R/zeek.R
  17. 15
      R/zeekr-package.R
  18. 61
      README.Rmd
  19. 127
      README.md
  20. 52
      appveyor.yml
  21. BIN
      inst/pcap/ssh.pcap
  22. 4
      inst/tinytest/test_zeekr.R
  23. 21
      man/find_zeek.Rd
  24. 14
      man/get_zeek.Rd
  25. 38
      man/pcap_to_zeek.Rd
  26. 22
      man/read_zeek_logs.Rd
  27. 22
      man/zeek.Rd
  28. 95
      man/zeek_man.Rd
  29. 25
      man/zeek_redefs.Rd
  30. 24
      man/zeekr.Rd
  31. 5
      tests/tinytest.R
  32. 205
      tools-ref/zeek.md
  33. 21
      zeekr.Rproj

27
.Rbuildignore

@ -0,0 +1,27 @@
^.vscode$
^.*\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^README\.*Rmd$
^README\.*html$
^NOTES\.*Rmd$
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^doc$
^docs$
^tmp$
^notes$
^CONDUCT.*$
^CODE.*$
^\.gitlab-ci\.yml$
^\.vscode$
^CRAN-RELEASE$
^appveyor\.yml$
^tools$
^LICENSE\.md$
^bld$
^node_modules^
^package-lock\.json$
^\.github$
^tools-ref$

1
.codecov.yml

@ -0,0 +1 @@
comment: false

1
.github/FUNDING.yaml

@ -0,0 +1 @@
github: hrbrmstr

26
.github/workflows/R-CMD-check.yaml

@ -0,0 +1,26 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
push:
branches:
- batman
pull_request:
branches:
- batmsn
name: R-CMD-check
jobs:
R-CMD-check:
runs-on: macOS-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@master
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
shell: Rscript {0}
- name: Check
run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}

9
.gitignore

@ -0,0 +1,9 @@
.DS_Store
.Rproj.user
.Rhistory
.RData
.Rproj
README_cache
src/*.o
src/*.so
src/*.dll

12
.travis.yml

@ -0,0 +1,12 @@
language: R
sudo: false
cache: packages
before_install:
- echo "options(repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/xenial/latest'))" >> ~/.Rprofile.site
- echo "options(HTTPUserAgent = paste0('R/', getRversion(), ' R (',
paste(getRversion(), R.version['platform'], R.version['arch'], R.version['os']),
')'))" >> ~/.Rprofile.site
after_success:
- Rscript -e 'covr::codecov()'

25
CONDUCT.md

@ -0,0 +1,25 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, we pledge to respect all people who
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
from the project team.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the Contributor Covenant
(http:contributor-covenant.org), version 1.0.0, available at
http://contributor-covenant.org/version/1/0/0/

28
DESCRIPTION

@ -0,0 +1,28 @@
Package: zeekr
Type: Package
Title: Tools to Make Analyses Using Zeek Easier
Version: 0.1.0
Date: 2021-07-25
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640"))
)
Maintainer: Bob Rudis <bob@rud.is>
Description: [Zeek](https://zeek.org/) is an open source network security monitoring
system. Tools are provided to make it a bit easier to work with Zeek to perform
analyses with R.
URL: https://git.rud.is/hrbrmstr/zeekr
BugReports: https://git.rud.is/hrbrmstr/zeekr/issues
Encoding: UTF-8
License: AGPL
Suggests:
covr, tinytest
Depends:
R (>= 3.6.0)
Imports:
utils,
arrow,
ndjson,
tools
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1

14
NAMESPACE

@ -0,0 +1,14 @@
# Generated by roxygen2: do not edit by hand
export(find_zeek)
export(get_zeek)
export(man_zeek)
export(pcap_to_zeek)
export(read_zeek_logs)
export(zeek)
export(zeek_man)
import(arrow)
import(ndjson)
importFrom(tools,file_ext)
importFrom(tools,file_path_sans_ext)
importFrom(utils,browseURL)

2
NEWS.md

@ -0,0 +1,2 @@
0.1.0
* Initial release

10
R/get-zeek.R

@ -0,0 +1,10 @@
#' Get Zeek
#'
#' Opens the default browser to the place where you can get Zeek.
#'
#' @export
#' @examples
#' if (interactive) get_zeek()
get_zeek <- function() {
utils::browseURL("https://zeek.org/get-zeek/")
}

106
R/pcap_to_zeek.R

@ -0,0 +1,106 @@
#' Process a PCAP with Zeek and create Parquet files
#'
#' @param pcap path to PCAP to process. ([path.expand()] will be called on this value)
#' @param out_dir path to Parquet files. ([path.expand()] will be called on this value)
#' If the directory does not exist it will be created. If ho directory is specified
#' a temporary directory will be created and used. You should
#' call [unlink()] on this path if you used a temporary directory.
#' @param zeek_opts extra options passed to to Zeek command line. NOTE:
#' `--no-checksums`, `LogAscii::use_json=T`, and `Log::default_scope_sep='_'`
#' are already handled by this function; no need to specify them.
#' @param ... extra named parameters passed on to [arrow::write_parquet()]
#' @return length 1 character vector of the expanded path of the `out_dir`
#' @note the `zeek` binary **must** be available on `PATH`. You can use the
#' environment variable `ZEEK_PATH` as a hint where [find_zeek()] will
#' look for the `zeek` binary.
#' @export
#' @examples
#' loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
#' read_zeek_logs(loc)
#' unlink(loc) # don't do this IRL until you're done working with or saving.
pcap_to_zeek <- function(pcap, out_dir = tempfile(pattern = "zeek"), zeek_opts = c(), ...) {
pcap <- path.expand(pcap[1])
if (!file.exists(pcap)) {
stop(sprintf("PCAP [%s] not found.", pcap), call.=FALSE)
}
out_dir <- path.expand(out_dir[1])
if (!dir.exists(out_dir)) dir.create(out_dir)
pcap_link <- file.path(out_dir, basename(pcap))
if (!file.symlink(pcap, pcap_link)) {
stop(sprintf("Could not create symlink %s for %s.", pcap_link, pcap), call. = FALSE)
}
zeek_opts <- c("--no-checksums", "LogAscii::use_json=T", "Log::default_scope_sep='_'", zeek_opts, "-r", pcap_link)
wd <- getwd()
on.exit(setwd(wd))
setwd(out_dir)
system2(
command = find_zeek(),
args = zeek_opts,
env = c("ZEEK_LOG_SUFFIX=json")
) -> status
stopifnot("Error converting PCAP." = (status == 0))
if (!file.remove(pcap_link)) {
stop(sprintf("Could not remove symlink %s", pcap_link), call.=FALSE)
}
in_fils <- list.files(out_dir, pattern = "\\.json$", full.names = TRUE)
out_fils <- sub("\\.json$", ".parquet", in_fils)
for (idx in seq_along(in_fils)) {
arrow::write_parquet(
x = arrow::read_json_arrow(
file = in_fils[idx],
as_data_frame = FALSE
),
sink = out_fils[idx],
...
)
file.remove(in_fils[idx])
}
out_dir
}
#' Find the Zeek binary
#'
#' Use the environment variable `ZEEK_PATH` or specify the directory in
#' the call to this function.
#'
#' @param path hint to where to look for the Zeek binary
#' @export
#' @return length 1 character vector of the path to the zeek binary or `""`
#' @examples
#' find_zeek()
find_zeek <- function(path = Sys.getenv("ZEEK_PATH", "")) {
if (path != "") {
Sys.setenv(
PATH = paste0(path, Sys.getenv("PATH"), sep = .Platform$path.sep)
)
}
res <- Sys.which("zeek")
stopifnot(
c("Cannot locate Zeek binary." = (res != ""))
)
unname(res)
}
set_names <- function (object = nm, nm) {
names(object) <- nm
object
}

32
R/read-zeek-logs.R

@ -0,0 +1,32 @@
#' Read zeek logs from a processed PCAP into a list
#'
#' @note Logs must be in Parquet or JSON format.
#' @param log_dir directory of zeek logs
#' @export
#' @examples
#' loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
#' read_zeek_logs(loc)
#' unlink(loc) # don't do this IRL until you're done working with or saving.
read_zeek_logs <- function(log_dir) {
log_dir <- path.expand(log_dir[1])
stopifnot("Cannot find directory." = dir.exists(log_dir))
in_fils <- list.files(log_dir, full.names = TRUE)
fil_names <- make.unique(tools::file_path_sans_ext(basename(in_fils)))
lapply(in_fils, function(.x) {
if (tools::file_ext(.x) == "parquet") {
arrow::read_parquet(.x)
} else {
ndjson::stream_in(.x, cls = "tbl")
}
}) -> out
set_names(out, fil_names)
}

86
R/zeek-man.R

@ -0,0 +1,86 @@
#' Zeek Manual Page Quick Reference
#'
#' `zeek` - passive network traffic analyzer
#'
#' ## SYNOPSIS
#'
#' **`zeek`** `[*options*] [*file* ...]`
#'
#' ## DESCRIPTION
#'
#' Zeek is primarily a security monitor that inspects all traffic on a link in depth for signs of suspicious activity. More generally, however, Zeek supports a wide range of traffic analysis tasks even #' outside of the security domain, including performance measurements and helping with trouble-shooting.
#'
#' Zeek comes with built-in functionality for a range of analysis and detection tasks, including detecting malware by interfacing to external registries, reporting vulnerable versions of software seen on #' the network, identifying popular web applications, detecting SSH brute-forcing, validating SSL certificate chains, among others.
#'
#' ## OPTIONS
#'
#' * **`&lt;file>`**: policy file, or read stdin
#' * **`-a`**, `--parse-only`: exit immediately after parsing scripts
#' * **`-b`**, `--bare-mode`: don't load scripts from the base/ directory
#' * **`-d`**, `--debug-policy:` activate policy file debugging
#' * **`-e`**, `--exec &lt;zeek code>`: augment loaded policies by given code
#' * **`-f`**, `--filter &lt;filter>`: tcpdump filter
#' * **`-h`**, `--help|-?`: command line help
#' * **`-i`**, `--iface &lt;interface>`: read from given interface
#' * **`-p`**, `--prefix &lt;prefix>`: add given prefix to policy file resolution
#' * **`-r`**, `--readfile &lt;readfile>`: read from given tcpdump file
#' * **`-s`**, `--rulefile &lt;rulefile>`: read rules from given file
#' * **`-t`**, `--tracefile &lt;tracefile>`: activate execution tracing
#' * **`-w`**, `--writefile &lt;writefile>`: write to given tcpdump file
#' * **`-v`**, `--version`: print version and exit
#' * **`-x`**, `--print-state &lt;file.bst>`: print contents of state file
#' * **`-C`**, `--no-checksums`: ignore checksums
#' * **`-F`**, `--force-dns`: force DNS
#' * **`-I`**, `--print-id &lt;ID name>`: print out given ID
#' * **`-N`**, `--print-plugins`: print available plugins and exit (**-NN** for verbose)
#' * **`-P`**, `--prime-dns`: prime DNS
#' * **`-Q`**, `--time`: print execution time summary to stderr
#' * **`-R`**, `--replay &lt;events.bst>`: replay events
#' * **`-S`**, `--debug-rules`: enable rule debugging
#' * **`-T`**, `--re-level &lt;level>`: set 'RE_level' for rules
#' * **`-U`**, `--status-file &lt;file>`: Record process status in file
#' * **`-W`**, `--watchdog`: activate watchdog timer
#' * **`-X`**, `--zeekygen &lt;cfgfile>`: generate documentation based on config file
#' * **`--pseudo-realtime[=**&lt;speedup>]`: enable pseudo-realtime for performance evaluation (default 1)
#' * **`--load-seeds`** `&lt;file>`: load seeds from given file
#' * **`--save-seeds`** `&lt;file>`: save seeds to given file
#'
#' The following option is available only when Zeek is built with the `--enable-debug` configure option:
#'
#' **`-B`**, `--debug &lt;dbgstreams>`: Enable debugging output for selected streams ('-B help' for help)
#'
#' The following options are available only when Zeek is built with `gperftools` support (use the `--enable-perftools` and `--enable-perftools-debug` configure options):
#'
#' **`-m`**, `--mem-leaks`: show leaks
#' **`-M`**, `--mem-profile`: record heap
#'
#' ## ENVIRONMENT
#'
#' - **`ZEEKPATH`**: file search path
#' - **`ZEEK_PLUGIN_PATH`**: plugin search path
#' - **`ZEEK_PLUGIN_ACTIVATE`**: plugins to always activate
#' - **`ZEEK_PREFIXES`**: prefix list
#' - **`ZEEK_DNS_FAKE`**: disable DNS lookups
#' - **`ZEEK_SEED_FILE`**: file to load seeds from
#' - **`ZEEK_LOG_SUFFIX`**: ASCII log file extension
#' - **`ZEEK_PROFILER_FILE`**: Output file for script execution statistics
#' - **`ZEEK_DISABLE_ZEEKYGEN`**: Disable Zeekygen (Broxygen) documentation support
#'
#' ## AUTHOR
#'
#' **zeek** was written by The Zeek Project &lt;info@zeek.org>.
#'
#' @name zeek_man
#' @rdname zeek_man
#' @export
#' @examples
#' zeek_man()
#' man_zeek
#' ?zeek_man
zeek_man <- function() {
help("zeek_man")
}
#' @rdname zeek_man
#' @export
man_zeek <- zeek_man

25
R/zeek-redefs.R

@ -0,0 +1,25 @@
#' (WIP) Common `redef`initions for Zeek when processing PCAPs
#'
#' Zeek is great out-of-the-box, but you may need to tweak behavior
#' every now and then to perform analyses on the Zeek logs.
#'
#' - `redef Log::default_scope_sep = "_"` will turn dots ("`.`") in column
#' names to underscores ("`_`"). This will make many "big data" environments
#' much more pleasant to deal with.
#'
#' - `redef FileExtraction::path = "/some/where/else"` will reconfigure where
#' Zeek's output files go.
#'
#' - `redef FTP::default_capture_password = T` will turn off Zeek's default
#' masking of FTP passwords.
#'
#' - `redef HTTP::default_capture_password=T` will turn off Zeek's default
#' state of not capturing HTTP passwords.
#'
#' - `redef Intel::read_files += { "/opt/zeek_file_badlist.txt" }` will load in
#' custom IoCs (see the [Intelligence Framework](https://docs.zeek.org/en/master/frameworks/intel.html))
#' for more info.
#'
#' @name zeek_redefs
#' @rdname zeek_redefs
NULL

34
R/zeek.R

@ -0,0 +1,34 @@
#' Call the Zeek binary with optional custom environment variables and options
#'
#' This is just a convenience wrapper around [system2()]. See [find_zeek()] for
#' information on helping this package find the Zeek binary.
#'
#' @param zeek_bin specify a complate path or let [find_zeek()] do the dirty work.
#' @param args same as [system2()] `args`
#' @param env same as [system2()] `env`
#' @return `list` with `stderr`, `stdout`, `status` and `errmsg` (invisibly)
#' @export
zeek <- function(zeek_bin = find_zeek(), args = c(), env = c()) {
errf <- tempfile()
on.exit(unlink(errf))
outf <- tempfile()
on.exit(unlink(outf))
system2(
command = zeek_bin,
args = args,
env = env,
stderr = errf,
stdout = outf
) -> res
invisible(list(
stderr = readLines(errf, warn = FALSE),
stdout = readLines(outf, warn = FALSE),
status = attr(res, "status"),
errmsg = attr(res, "errmsg")
))
}

15
R/zeekr-package.R

@ -0,0 +1,15 @@
#' Tools to Make Analyses Using Zeek Easier
#'
#' [Zeek](https://zeek.org/) is an open source network security monitoring
#' system. Tools are provided to make it a bit easier to work with Zeek to perform
#' analyses with R.
#'
#' @md
#' @name zeekr
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @import arrow
#' @import ndjson
#' @importFrom utils browseURL
#' @importFrom tools file_path_sans_ext file_ext
"_PACKAGE"

61
README.Rmd

@ -0,0 +1,61 @@
---
output: rmarkdown::github_document
editor_options:
chunk_output_type: console
---
```{r pkg-knitr-opts, include=FALSE}
hrbrpkghelpr::global_opts()
```
```{r badges, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::stinking_badges()
```
```{r description, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::yank_title_and_description()
```
## What's Inside The Tin
The following functions are implemented:
```{r ingredients, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::describe_ingredients()
```
## Installation
```{r install-ex, results='asis', echo=FALSE, cache=FALSE}
hrbrpkghelpr::install_block()
```
## Usage
```{r lib-ex}
library(zeekr)
# current version
packageVersion("zeekr")
```
```{r ex-01}
loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
list.files(loc)
zeek <- read_zeek_logs(loc)
str(zeek, 3)
```
## zeekr Metrics
```{r cloc, echo=FALSE}
cloc::cloc_pkg_md()
```
## Code of Conduct
Please note that this project is released with a Contributor Code of Conduct.
By participating in this project you agree to abide by its terms.

127
README.md

@ -1,2 +1,129 @@
[![Project Status: Active – The project has reached a stable, usable
state and is being actively
developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
[![Signed
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr)
![Signed commit
%](https://img.shields.io/badge/Signed_Commits-100%25-lightgrey.svg)
[![R-CMD-check](https://github.com/hrbrmstr/zeekr/workflows/R-CMD-check/badge.svg)](https://github.com/hrbrmstr/zeekr/actions?query=workflow%3AR-CMD-check)
[![Linux build
Status](https://travis-ci.org/hrbrmstr/zeekr.svg?branch=master)](https://travis-ci.org/hrbrmstr/zeekr)
![Minimal R
Version](https://img.shields.io/badge/R%3E%3D-3.6.0-blue.svg)
![License](https://img.shields.io/badge/License-AGPL-blue.svg)
# zeekr
Tools to Make Analyses Using Zeek Easier
## Description
[Zeek](https://zeek.org/) is an open source network security monitoring
system. Tools are provided to make it a bit easier to work with Zeek to
perform analyses with R.
## What’s Inside The Tin
The following functions are implemented:
- `find_zeek`: Find the Zeek binary
- `get_zeek`: Get Zeek
- `pcap_to_zeek`: Process a PCAP with Zeek and create Parquet files
- `read_zeek_logs`: Read zeek logs from a processed PCAP into a list
- `zeek_man`: Zeek Manual Page Quick Reference
- `zeek_redefs`: (WIP) Common redefinitions for Zeek when processing
PCAPs
- `zeek`: Call the Zeek binary with optional custom environment
variables and options
## Installation
``` r
remotes::install_git("https://git.rud.is/hrbrmstr/zeekr.git")
```
NOTE: To use the ‘remotes’ install options you will need to have the
[{remotes} package](https://github.com/r-lib/remotes) installed.
## Usage
``` r
library(zeekr)
# current version
packageVersion("zeekr")
## [1] '0.1.0'
```
``` r
loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
list.files(loc)
## [1] "conn.parquet" "packet_filter.parquet" "ssh.parquet"
zeek <- read_zeek_logs(loc)
str(zeek, 3)
## List of 3
## $ conn :Classes 'tbl_df', 'tbl' and 'data.frame': 49 obs. of 18 variables:
## ..$ ts : num [1:49] 1.32e+09 1.32e+09 1.32e+09 1.32e+09 1.32e+09 ...
## ..$ uid : chr [1:49] "CAmnTGoJHwg6hNFea" "CvHC1J1yB5yLVQuXR7" "CE6UIa40hIdUlMZCS2" "CK2bU24sjXqxWxiSpl" ...
## ..$ id_orig_h : chr [1:49] "172.16.238.1" "172.16.238.1" "172.16.238.1" "172.16.238.1" ...
## ..$ id_orig_p : int [1:49] 58389 58389 58389 58389 58397 58389 58389 58398 58389 58399 ...
## ..$ id_resp_h : chr [1:49] "172.16.238.135" "172.16.238.135" "172.16.238.135" "172.16.238.135" ...
## ..$ id_resp_p : int [1:49] 22 22 22 22 22 22 22 22 22 22 ...
## ..$ proto : chr [1:49] "tcp" "tcp" "tcp" "tcp" ...
## ..$ duration : num [1:49] 4.23 NA NA NA 17.78 ...
## ..$ orig_bytes : int [1:49] 0 NA NA NA 1733 NA NA 1733 NA 1589 ...
## ..$ resp_bytes : int [1:49] 0 NA NA NA 2007 NA NA 2007 NA 1943 ...
## ..$ conn_state : chr [1:49] "SH" "SH" "SH" "SH" ...
## ..$ missed_bytes : int [1:49] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ history : chr [1:49] "F" "F" "F" "F" ...
## ..$ orig_pkts : int [1:49] 6 1 1 1 21 1 1 21 1 19 ...
## ..$ orig_ip_bytes: int [1:49] 312 52 52 52 2837 52 52 2837 52 2589 ...
## ..$ resp_pkts : int [1:49] 0 0 0 0 17 0 0 18 0 16 ...
## ..$ resp_ip_bytes: int [1:49] 0 0 0 0 2899 0 0 2951 0 2783 ...
## ..$ service : chr [1:49] NA NA NA NA ...
## $ packet_filter:Classes 'tbl_df', 'tbl' and 'data.frame': 1 obs. of 5 variables:
## ..$ ts : num 1.63e+09
## ..$ node : chr "zeek"
## ..$ filter : chr "ip or not ip"
## ..$ init : logi TRUE
## ..$ success: logi TRUE
## $ ssh :Classes 'tbl_df', 'tbl' and 'data.frame': 40 obs. of 17 variables:
## ..$ ts : num [1:40] 1.32e+09 1.32e+09 1.32e+09 1.32e+09 1.32e+09 ...
## ..$ uid : chr [1:40] "CUegdV3IvAoKWGu681" "Ci9sXu2lSlpqg3Au47" "Csw04n3ZhbvBDjJTNd" "C3qrkm4EaQi2uvlpD6" ...
## ..$ id_orig_h : chr [1:40] "172.16.238.1" "172.16.238.1" "172.16.238.1" "172.16.238.1" ...
## ..$ id_orig_p : int [1:40] 58395 58396 58397 58398 58399 58402 58403 58404 58405 58406 ...
## ..$ id_resp_h : chr [1:40] "172.16.238.168" "172.16.238.129" "172.16.238.136" "172.16.238.136" ...
## ..$ id_resp_p : int [1:40] 22 22 22 22 22 22 22 22 22 22 ...
## ..$ version : int [1:40] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ auth_success : logi [1:40] TRUE TRUE FALSE FALSE FALSE FALSE ...
## ..$ auth_attempts : int [1:40] 3 1 2 2 1 2 2 2 4 2 ...
## ..$ client : chr [1:40] "SSH-2.0-OpenSSH_5.6" "SSH-2.0-OpenSSH_5.6" "SSH-2.0-OpenSSH_5.6" "SSH-2.0-OpenSSH_5.6" ...
## ..$ server : chr [1:40] "SSH-2.0-OpenSSH_5.3" "SSH-2.0-OpenSSH_5.3" "SSH-2.0-OpenSSH_5.8p1 Debian-7ubuntu1" "SSH-2.0-OpenSSH_5.8p1 Debian-7ubuntu1" ...
## ..$ cipher_alg : chr [1:40] "aes128-ctr" "aes128-ctr" "aes128-ctr" "aes128-ctr" ...
## ..$ mac_alg : chr [1:40] "hmac-md5" "hmac-md5" "hmac-md5" "hmac-md5" ...
## ..$ compression_alg: chr [1:40] "none" "none" "none" "none" ...
## ..$ kex_alg : chr [1:40] "diffie-hellman-group-exchange-sha256" "diffie-hellman-group-exchange-sha256" "diffie-hellman-group-exchange-sha256" "diffie-hellman-group-exchange-sha256" ...
## ..$ host_key_alg : chr [1:40] "ssh-rsa" "ssh-rsa" "ssh-rsa" "ssh-rsa" ...
## ..$ host_key : chr [1:40] "a5:3c:40:6e:e8:bf:5d:09:79:c8:4b:2c:65:5f:eb:12" "a5:3c:40:6e:e8:bf:5d:09:79:c8:4b:2c:65:5f:eb:12" "87:11:46:da:89:c5:2b:d9:6b:ee:e0:44:7e:73:80:f8" "87:11:46:da:89:c5:2b:d9:6b:ee:e0:44:7e:73:80:f8" ...
```
## zeekr Metrics
| Lang | # Files | (%) | LoC | (%) | Blank lines | (%) | # Lines | (%) |
|:-----|--------:|-----:|----:|-----:|------------:|-----:|--------:|-----:|
| R | 8 | 0.36 | 100 | 0.37 | 38 | 0.32 | 175 | 0.42 |
| YAML | 2 | 0.09 | 23 | 0.09 | 2 | 0.02 | 2 | 0.00 |
| Rmd | 1 | 0.05 | 12 | 0.04 | 19 | 0.16 | 30 | 0.07 |
| SUM | 11 | 0.50 | 135 | 0.50 | 59 | 0.50 | 207 | 0.50 |
clock Package Metrics for zeekr
## Code of Conduct
Please note that this project is released with a Contributor Code of
Conduct. By participating in this project you agree to abide by its
terms.

52
appveyor.yml

@ -0,0 +1,52 @@
# DO NOT CHANGE the "init" and "install" sections below
# Download script file from GitHub
init:
ps: |
= "Stop"
Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
Import-Module '..\appveyor-tool.ps1'
install:
ps: Bootstrap
cache:
- C:\RLibrary
environment:
NOT_CRAN: true
# env vars that may need to be set, at least temporarily, from time to time
# see https://github.com/krlmlr/r-appveyor#readme for details
# USE_RTOOLS: true
# R_REMOTES_STANDALONE: true
# Adapt as necessary starting from here
build_script:
- travis-tool.sh install_deps
test_script:
- travis-tool.sh run_tests
on_failure:
- 7z a failure.zip *.Rcheck\*
- appveyor PushArtifact failure.zip
artifacts:
- path: '*.Rcheck\**\*.log'
name: Logs
- path: '*.Rcheck\**\*.out'
name: Logs
- path: '*.Rcheck\**\*.fail'
name: Logs
- path: '*.Rcheck\**\*.Rout'
name: Logs
- path: '\*_*.tar.gz'
name: Bits
- path: '\*_*.zip'
name: Bits

BIN
inst/pcap/ssh.pcap

Binary file not shown.

4
inst/tinytest/test_zeekr.R

@ -0,0 +1,4 @@
# Placeholder with simple test
expect_equal(1 + 1, 2)

21
man/find_zeek.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pcap_to_zeek.R
\name{find_zeek}
\alias{find_zeek}
\title{Find the Zeek binary}
\usage{
find_zeek(path = Sys.getenv("ZEEK_PATH", ""))
}
\arguments{
\item{path}{hint to where to look for the Zeek binary}
}
\value{
length 1 character vector of the path to the zeek binary or \code{""}
}
\description{
Use the environment variable \code{ZEEK_PATH} or specify the directory in
the call to this function.
}
\examples{
find_zeek()
}

14
man/get_zeek.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-zeek.R
\name{get_zeek}
\alias{get_zeek}
\title{Get Zeek}
\usage{
get_zeek()
}
\description{
Opens the default browser to the place where you can get Zeek.
}
\examples{
if (interactive) get_zeek()
}

38
man/pcap_to_zeek.Rd

@ -0,0 +1,38 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pcap_to_zeek.R
\name{pcap_to_zeek}
\alias{pcap_to_zeek}
\title{Process a PCAP with Zeek and create Parquet files}
\usage{
pcap_to_zeek(pcap, out_dir = tempfile(pattern = "zeek"), zeek_opts = c(), ...)
}
\arguments{
\item{pcap}{path to PCAP to process. (\code{\link[=path.expand]{path.expand()}} will be called on this value)}
\item{out_dir}{path to Parquet files. (\code{\link[=path.expand]{path.expand()}} will be called on this value)
If the directory does not exist it will be created. If ho directory is specified
a temporary directory will be created and used. You should
call \code{\link[=unlink]{unlink()}} on this path if you used a temporary directory.}
\item{zeek_opts}{extra options passed to to Zeek command line. NOTE:
\code{--no-checksums}, \code{LogAscii::use_json=T}, and \code{Log::default_scope_sep='_'}
are already handled by this function; no need to specify them.}
\item{...}{extra named parameters passed on to \code{\link[arrow:write_parquet]{arrow::write_parquet()}}}
}
\value{
length 1 character vector of the expanded path of the \code{out_dir}
}
\description{
Process a PCAP with Zeek and create Parquet files
}
\note{
the \code{zeek} binary \strong{must} be available on \code{PATH}. You can use the
environment variable \code{ZEEK_PATH} as a hint where \code{\link[=find_zeek]{find_zeek()}} will
look for the \code{zeek} binary.
}
\examples{
loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
read_zeek_logs(loc)
unlink(loc) # don't do this IRL until you're done working with or saving.
}

22
man/read_zeek_logs.Rd

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-zeek-logs.R
\name{read_zeek_logs}
\alias{read_zeek_logs}
\title{Read zeek logs from a processed PCAP into a list}
\usage{
read_zeek_logs(log_dir)
}
\arguments{
\item{log_dir}{directory of zeek logs}
}
\description{
Read zeek logs from a processed PCAP into a list
}
\note{
Logs must be in Parquet or JSON format.
}
\examples{
loc <- pcap_to_zeek(system.file("pcap/ssh.pcap", package = "zeekr"))
read_zeek_logs(loc)
unlink(loc) # don't do this IRL until you're done working with or saving.
}

22
man/zeek.Rd

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zeek.R
\name{zeek}
\alias{zeek}
\title{Call the Zeek binary with optional custom environment variables and options}
\usage{
zeek(zeek_bin = find_zeek(), args = c(), env = c())
}
\arguments{
\item{zeek_bin}{specify a complate path or let \code{\link[=find_zeek]{find_zeek()}} do the dirty work.}
\item{args}{same as \code{\link[=system2]{system2()}} \code{args}}
\item{env}{same as \code{\link[=system2]{system2()}} \code{env}}
}
\value{
\code{list} with \code{stderr}, \code{stdout}, \code{status} and \code{errmsg} (invisibly)
}
\description{
This is just a convenience wrapper around \code{\link[=system2]{system2()}}. See \code{\link[=find_zeek]{find_zeek()}} for
information on helping this package find the Zeek binary.
}

95
man/zeek_man.Rd

@ -0,0 +1,95 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zeek-man.R
\name{zeek_man}
\alias{zeek_man}
\alias{man_zeek}
\title{Zeek Manual Page Quick Reference}
\usage{
zeek_man()
man_zeek()
}
\description{
\code{zeek} - passive network traffic analyzer
}
\details{
\subsection{SYNOPSIS}{
\strong{\code{zeek}} \verb{[*options*] [*file* ...]}
}
\subsection{DESCRIPTION}{
Zeek is primarily a security monitor that inspects all traffic on a link in depth for signs of suspicious activity. More generally, however, Zeek supports a wide range of traffic analysis tasks even #' outside of the security domain, including performance measurements and helping with trouble-shooting.
Zeek comes with built-in functionality for a range of analysis and detection tasks, including detecting malware by interfacing to external registries, reporting vulnerable versions of software seen on #' the network, identifying popular web applications, detecting SSH brute-forcing, validating SSL certificate chains, among others.
}
\subsection{OPTIONS}{
\itemize{
\item \strong{\verb{&lt;file>}}: policy file, or read stdin
\item \strong{\code{-a}}, \code{--parse-only}: exit immediately after parsing scripts
\item \strong{\code{-b}}, \code{--bare-mode}: don't load scripts from the base/ directory
\item \strong{\code{-d}}, \verb{--debug-policy:} activate policy file debugging
\item \strong{\code{-e}}, \verb{--exec &lt;zeek code>}: augment loaded policies by given code
\item \strong{\code{-f}}, \verb{--filter &lt;filter>}: tcpdump filter
\item \strong{\code{-h}}, \verb{--help|-?}: command line help
\item \strong{\code{-i}}, \verb{--iface &lt;interface>}: read from given interface
\item \strong{\code{-p}}, \verb{--prefix &lt;prefix>}: add given prefix to policy file resolution
\item \strong{\code{-r}}, \verb{--readfile &lt;readfile>}: read from given tcpdump file
\item \strong{\code{-s}}, \verb{--rulefile &lt;rulefile>}: read rules from given file
\item \strong{\code{-t}}, \verb{--tracefile &lt;tracefile>}: activate execution tracing
\item \strong{\code{-w}}, \verb{--writefile &lt;writefile>}: write to given tcpdump file
\item \strong{\code{-v}}, \code{--version}: print version and exit
\item \strong{\code{-x}}, \verb{--print-state &lt;file.bst>}: print contents of state file
\item \strong{\code{-C}}, \code{--no-checksums}: ignore checksums
\item \strong{\code{-F}}, \code{--force-dns}: force DNS
\item \strong{\code{-I}}, \verb{--print-id &lt;ID name>}: print out given ID
\item \strong{\code{-N}}, \code{--print-plugins}: print available plugins and exit (\strong{-NN} for verbose)
\item \strong{\code{-P}}, \code{--prime-dns}: prime DNS
\item \strong{\code{-Q}}, \code{--time}: print execution time summary to stderr
\item \strong{\code{-R}}, \verb{--replay &lt;events.bst>}: replay events
\item \strong{\code{-S}}, \code{--debug-rules}: enable rule debugging
\item \strong{\code{-T}}, \verb{--re-level &lt;level>}: set 'RE_level' for rules
\item \strong{\code{-U}}, \verb{--status-file &lt;file>}: Record process status in file
\item \strong{\code{-W}}, \code{--watchdog}: activate watchdog timer
\item \strong{\code{-X}}, \verb{--zeekygen &lt;cfgfile>}: generate documentation based on config file
\item **\verb{--pseudo-realtime[=**&lt;speedup>]}: enable pseudo-realtime for performance evaluation (default 1)
\item \strong{\code{--load-seeds}} \verb{&lt;file>}: load seeds from given file
\item \strong{\code{--save-seeds}} \verb{&lt;file>}: save seeds to given file
}
The following option is available only when Zeek is built with the \code{--enable-debug} configure option:
\strong{\code{-B}}, \verb{--debug &lt;dbgstreams>}: Enable debugging output for selected streams ('-B help' for help)
The following options are available only when Zeek is built with \code{gperftools} support (use the \code{--enable-perftools} and \code{--enable-perftools-debug} configure options):
\strong{\code{-m}}, \code{--mem-leaks}: show leaks
\strong{\code{-M}}, \code{--mem-profile}: record heap
}
\subsection{ENVIRONMENT}{
\itemize{
\item \strong{\code{ZEEKPATH}}: file search path
\item \strong{\code{ZEEK_PLUGIN_PATH}}: plugin search path
\item \strong{\code{ZEEK_PLUGIN_ACTIVATE}}: plugins to always activate
\item \strong{\code{ZEEK_PREFIXES}}: prefix list
\item \strong{\code{ZEEK_DNS_FAKE}}: disable DNS lookups
\item \strong{\code{ZEEK_SEED_FILE}}: file to load seeds from
\item \strong{\code{ZEEK_LOG_SUFFIX}}: ASCII log file extension
\item \strong{\code{ZEEK_PROFILER_FILE}}: Output file for script execution statistics
\item \strong{\code{ZEEK_DISABLE_ZEEKYGEN}}: Disable Zeekygen (Broxygen) documentation support
}
}
\subsection{AUTHOR}{
\strong{zeek} was written by The Zeek Project <info@zeek.org>.
}
}
\examples{
zeek_man()
man_zeek
?zeek_man
}

25
man/zeek_redefs.Rd

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zeek-redefs.R
\name{zeek_redefs}
\alias{zeek_redefs}
\title{(WIP) Common \code{redef}initions for Zeek when processing PCAPs}
\description{
Zeek is great out-of-the-box, but you may need to tweak behavior
every now and then to perform analyses on the Zeek logs.
}
\details{
\itemize{
\item \verb{redef Log::default_scope_sep = "_"} will turn dots ("\code{.}") in column
names to underscores ("\verb{_}"). This will make many "big data" environments
much more pleasant to deal with.
\item \verb{redef FileExtraction::path = "/some/where/else"} will reconfigure where
Zeek's output files go.
\item \verb{redef FTP::default_capture_password = T} will turn off Zeek's default
masking of FTP passwords.
\item \verb{redef HTTP::default_capture_password=T} will turn off Zeek's default
state of not capturing HTTP passwords.
\item \verb{redef Intel::read_files += \{ "/opt/zeek_file_badlist.txt" \}} will load in
custom IoCs (see the \href{https://docs.zeek.org/en/master/frameworks/intel.html}{Intelligence Framework})
for more info.
}
}

24
man/zeekr.Rd

@ -0,0 +1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/zeekr-package.R
\docType{package}
\name{zeekr}
\alias{zeekr}
\alias{zeekr-package}
\title{Tools to Make Analyses Using Zeek Easier}
\description{
\href{https://zeek.org/}{Zeek} is an open source network security monitoring
system. Tools are provided to make it a bit easier to work with Zeek to perform
analyses with R.
}
\seealso{
Useful links:
\itemize{
\item \url{https://git.rud.is/hrbrmstr/zeekr}
\item Report bugs at \url{https://git.rud.is/hrbrmstr/zeekr/issues}
}
}
\author{
Bob Rudis (bob@rud.is)
}
\keyword{internal}

5
tests/tinytest.R

@ -0,0 +1,5 @@
if ( requireNamespace("tinytest", quietly=TRUE) ){
tinytest::test_package("zeekr")
}

205
tools-ref/zeek.md

@ -0,0 +1,205 @@
# NAME
zeek - passive network traffic analyzer
# SYNOPSIS
**zeek** \[*options*\] \[*file* \...\]
# DESCRIPTION
Zeek is primarily a security monitor that inspects all traffic on a link
in depth for signs of suspicious activity. More generally, however, Zeek
supports a wide range of traffic analysis tasks even outside of the
security domain, including performance measurements and helping with
trouble-shooting.
Zeek comes with built-in functionality for a range of analysis and
detection tasks, including detecting malware by interfacing to external
registries, reporting vulnerable versions of software seen on the
network, identifying popular web applications, detecting SSH
brute-forcing, validating SSL certificate chains, among others.
# OPTIONS
**\<file>**
: policy file, or read stdin
**-a**, \--parse-only
: exit immediately after parsing scripts
**-b**, \--bare-mode
: don\'t load scripts from the base/ directory
**-d**, \--debug-policy
: activate policy file debugging
**-e**, \--exec \<zeek code>
: augment loaded policies by given code
**-f**, \--filter \<filter>
: tcpdump filter
**-h**, \--help\|-?
: command line help
**-i**, \--iface \<interface>
: read from given interface
**-p**, \--prefix \<prefix>
: add given prefix to policy file resolution
**-r**, \--readfile \<readfile>
: read from given tcpdump file
**-s**, \--rulefile \<rulefile>
: read rules from given file
**-t**, \--tracefile \<tracefile>
: activate execution tracing
**-w**, \--writefile \<writefile>
: write to given tcpdump file
**-v**, \--version
: print version and exit
**-x**, \--print-state \<file.bst>
: print contents of state file
**-C**, \--no-checksums
: ignore checksums
**-F**, \--force-dns
: force DNS
**-I**, \--print-id \<ID name>
: print out given ID
**-N**, \--print-plugins
: print available plugins and exit (**-NN** for verbose)
**-P**, \--prime-dns
: prime DNS
**-Q**, \--time
: print execution time summary to stderr
**-R**, \--replay \<events.bst>
: replay events
**-S**, \--debug-rules
: enable rule debugging
**-T**, \--re-level \<level>
: set \'RE_level\' for rules
**-U**, \--status-file \<file>
: Record process status in file
**-W**, \--watchdog
: activate watchdog timer
**-X**, \--zeekygen \<cfgfile>
: generate documentation based on config file
**\--pseudo-realtime\[=**\<speedup>\]
: enable pseudo-realtime for performance evaluation (default 1)
**\--load-seeds** \<file>
: load seeds from given file
**\--save-seeds** \<file>
: save seeds to given file
The following option is available only when Zeek is built with the \--enable-debug configure option:
:
**-B**, \--debug \<dbgstreams>
: Enable debugging output for selected streams (\'-B help\' for help)
The following options are available only when Zeek is built with gperftools support (use the \--enable-perftools and \--enable-perftools-debug configure options):
:
**-m**, \--mem-leaks
: show leaks
**-M**, \--mem-profile
: record heap
# ENVIRONMENT
**ZEEKPATH**
: file search path
**ZEEK_PLUGIN_PATH**
: plugin search path
**ZEEK_PLUGIN_ACTIVATE**
: plugins to always activate
**ZEEK_PREFIXES**
: prefix list
**ZEEK_DNS_FAKE**
: disable DNS lookups
**ZEEK_SEED_FILE**
: file to load seeds from
**ZEEK_LOG_SUFFIX**
: ASCII log file extension
**ZEEK_PROFILER_FILE**
: Output file for script execution statistics
**ZEEK_DISABLE_ZEEKYGEN**
: Disable Zeekygen (Broxygen) documentation support
# AUTHOR
**zeek** was written by The Zeek Project \<info\@zeek.org>.

21
zeekr.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --resave-data
PackageRoxygenize: rd,collate,namespace
Loading…
Cancel
Save