Browse Source

tidy function

master
boB Rudis 3 years ago
parent
commit
008a5f4ab9
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 2
      DESCRIPTION
  2. 4
      NAMESPACE
  3. 108
      R/brim-main.R
  4. 3
      R/brimr-package.R
  5. 4
      R/utils.R
  6. 70
      README.Rmd
  7. 332
      README.md
  8. 7
      man/brim_search.Rd
  9. BIN
      man/figures/README-graph-1.png
  10. 14
      man/tidy_brim.Rd

2
DESCRIPTION

@ -22,6 +22,8 @@ Depends:
Imports:
httr,
jsonlite,
scales,
ipaddress,
stringi
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.1

4
NAMESPACE

@ -1,10 +1,14 @@
# Generated by roxygen2: do not edit by hand
S3method(print,brim_search_results)
export(brim_ast)
export(brim_host)
export(brim_search)
export(brim_search_raw)
export(brim_spaces)
export(tidy_brim)
import(httr)
import(ipaddress)
importFrom(jsonlite,fromJSON)
importFrom(scales,comma)
importFrom(stringi,stri_split_lines)

108
R/brim-main.R

@ -46,7 +46,7 @@ brim_spaces <- function(host = brim_host()) {
out <- httr::content(res, as = "text", encoding = "UTF-8")
out <- jsonlite::fromJSON(out)
class(out) <- c("tbl_df", "tbl", "data.frame")
out
}
@ -93,8 +93,108 @@ brim_search <- function(space_name, zql, host = brim_host()) {
res <- brim_search_raw(space_name = space_name, zql = zql, host = host)
res <- stringi::stri_split_lines(res, omit_empty = TRUE)
res <- unlist(res)
res <- lapply(res, jsonlite::fromJSON)
res <- lapply(res, jsonlite::fromJSON, simplifyVector=TRUE, simplifyDataFrame = FALSE, simplifyMatrix = FALSE)
class(res) <- c("brim_search_results", "list")
res
}
#' @rdname brim_search
#' @param x a `brim_search_result` object
#' @param ... unused
#' @export
print.brim_search_results <- function(x, ...) {
invisible(res)
stats <- x[[which(sapply(x, function(.x) .x$type == "SearchStats"))]]
as.numeric(
as.POSIXct(stats$update_time$sec, origin = "1970-01-01") -
as.POSIXct(stats$start_time$sec, origin = "1970-01-01"), "secs"
) -> delta
}
cat(
"ZQL query took ", scales::comma(delta, accuracy = 0.0001), " seconds", "; ",
scales::comma(stats$records_matched), " records matched", "; ",
scales::comma(stats$records_read), " records read", "; ",
scales::comma(stats$bytes_read), " bytes read", "\n", sep = ""
)
}
# TODO: Handle array, set, enum, union,
process_record <- function(aliases, schema, value) {
nam <- schema[["name"]]
typ <- schema[["type"]]
typ <- ifelse(is.na(aliases[typ]), typ, aliases[typ])
switch(
typ,
record = mapply(function(sch, val) {
process_record(aliases, sch, val)
}, schema[["of"]], value),
null = set_names(NA, nam),
bstring = set_names(list(value), nam),
uint8 = set_names(as.integer(value), nam),
uint16 = set_names(as.integer(value), nam),
uint32 = set_names(as.integer(value), nam),
uint64 = set_names(as.integer(value), nam),
int8 = set_names(as.integer(value), nam),
int16 = set_names(as.integer(value), nam),
int32 = set_names(as.integer(value), nam),
int64 = set_names(as.integer(value), nam),
time = set_names(anytime::anytime(value), nam),
duration = set_names(as.numeric(value), nam),
float16 = set_names(as.numeric(value), nam),
float32 = set_names(as.numeric(value), nam),
float64 = set_names(as.numeric(value), nam),
decimal = set_names(as.numeric(value), nam),
bool = set_names(as.logical(value), nam),
ip = set_names(as.character(value), nam),
net = set_names(as.character(value), nam)#,
# net = set_names(list(ipaddress::as_ip_network(value)), nam),
# ip = set_names(list(ipaddress::as_ip_address(value)), nam)
)
}
#' Turn Brim/zqd search results into a data frame
#'
#' @param x Brim/zqd search results
#' @export
tidy_brim <- function(x) {
records <- x[[which(sapply(x, function(.x) .x$type == "SearchRecords"))]][["records"]]
aliases <- data.frame(name = character(0), type = character(0))
rbind.data.frame(
aliases,
do.call(rbind.data.frame, lapply(records[which(sapply(records, hasName, "aliases"))], function(.x) as.data.frame(.x$aliases)))
) -> aliases
aliases <- set_names(aliases$type, aliases$name)
schemas <- list()
for (rec in records[which(sapply(records, hasName, "schema"))]) {
schemas[[as.character(rec$id)]] <- rec$schema
}
lapply(records[which(sapply(records, hasName, "values"))], function(.x) {
process_record(aliases, schemas[[as.character(.x$id)]], .x[["values"]])
}) -> tmp
lapply(tmp, function(.x) {
do.call(cbind.data.frame, lapply(.x, function(.y) {
if (!is.list(.y)) as.list(.y) else .y
}))
}) -> tmp
tmp <- do.call(rbind.data.frame, tmp)
tmp
}

3
R/brimr-package.R

@ -8,7 +8,8 @@
#' @name brimr
#' @keywords internal
#' @author Bob Rudis (bob@@rud.is)
#' @import httr
#' @import httr ipaddress
#' @importFrom scales comma
#' @importFrom stringi stri_split_lines
#' @importFrom jsonlite fromJSON
"_PACKAGE"

4
R/utils.R

@ -0,0 +1,4 @@
set_names <- function (object = nm, nm) {
names(object) <- nm
object
}

70
README.Rmd

@ -40,18 +40,78 @@ packageVersion("brimr")
```
```{r ex-01}
### Available Brim "spaces"
```{r available-brim-spaces}
brim_spaces()
```
zql <- '_path=conn | count() by id.orig_h, id.resp_h, id.resp_p | sort id.orig_h, id.resp_h, id.resp_p'
### Sample ZQL query
cat(jsonlite::toJSON(jsonlite::fromJSON(brim_ast(zql)), pretty = TRUE))
```{r sample-zql-query}
# Z query to fetch Zeek connection data to create our network connection graph
zql1 <- '_path=conn | count() by id.orig_h, id.resp_h, id.resp_p | sort id.orig_h, id.resp_h, id.resp_p'
cat(
substr(jsonlite::toJSON(jsonlite::fromJSON(brim_ast(zql1)), pretty = TRUE), 1, 100), "..."
)
```
### Let's execute the query
```{r zeek-query-execution}
space <- "2021-02-17-Trickbot-gtag-rob13-infection-in-AD-environment.pcap"
r <- brim_search(space, zql)
r1 <- brim_search(space, zql1)
r1
(r1 <- as_tibble(tidy_brim(r1)))
```
### Let's try one that processes the Suricata alerts
```{r suricata-query-execution}
# Z query to fetch Suricata alerts including the count of alerts per source:destination
zql2 <- "event_type=alert | count() by src_ip, dest_ip, dest_port, alert.severity, alert.signature | sort src_ip, dest_ip, dest_port, alert.severity, alert.signature"
r2 <- brim_search(space, zql2)
r2
(r2 <- (as_tibble(tidy_brim(r2))))
```
str(r, 2)
```{r graph, fig.width = 9}
library(igraph)
library(ggraph)
library(tidyverse)
gdf <- count(r1, orig_h, resp_h, wt=count)
count(gdf, node = resp_h, wt=n, name = "in_degree") %>%
full_join(
count(gdf, node = orig_h, name = "out_degree")
) %>%
mutate_at(
vars(in_degree, out_degree),
replace_na, 1
) %>%
arrange(in_degree) -> vdf
g <- graph_from_data_frame(gdf, vertices = vdf)
ggraph(g, layout = "linear") +
geom_node_point(
aes(size = in_degree), shape = 21
) +
geom_edge_arc(
width = 0.125,
arrow = arrow(
length = unit(5, "pt"),
type = "closed"
)
)
```
## brimr Metrics

332
README.md

@ -35,6 +35,7 @@ The following functions are implemented:
retrieve processed results
- `brim_spaces`: Retrieve active Brim spaces from the specified Brim
instance
- `tidy_brim`: Turn Brim/zqd search results into a data frame
## Installation
@ -62,247 +63,130 @@ packageVersion("brimr")
## [1] '0.1.0'
```
### Available Brim “spaces”
``` r
brim_spaces()
## # A tibble: 1 x 4
## id name data_path storage_kind
## * <chr> <chr> <chr> <chr>
## 1 sp_1p6pwLgtsESYBT… 2021-02-17-Trickbot-gtag-rob13-i… file:///Users/hrbrmstr/Library/Application%20Suppor… filestore
## id name
## 1 sp_1p6pwLgtsESYBTHU9PL9fcl2iBn 2021-02-17-Trickbot-gtag-rob13-infection-in-AD-environment.pcap
## data_path storage_kind
## 1 file:///Users/hrbrmstr/Library/Application%20Support/Brim/data/spaces/sp_1p6pwLgtsESYBTHU9PL9fcl2iBn filestore
```
zql <- '_path=conn | count() by id.orig_h, id.resp_h, id.resp_p | sort id.orig_h, id.resp_h, id.resp_p'
### Sample ZQL query
cat(jsonlite::toJSON(jsonlite::fromJSON(brim_ast(zql)), pretty = TRUE))
``` r
# Z query to fetch Zeek connection data to create our network connection graph
zql1 <- '_path=conn | count() by id.orig_h, id.resp_h, id.resp_p | sort id.orig_h, id.resp_h, id.resp_p'
cat(
substr(jsonlite::toJSON(jsonlite::fromJSON(brim_ast(zql1)), pretty = TRUE), 1, 100), "..."
)
## {
## "op": ["SequentialProc"],
## "procs": [
## {
## "op": "FilterProc",
## "filter": {
## "op": "CompareField",
## "comparator": "=",
## "field": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "_path"
## }
## },
## "value": {
## "op": "Literal",
## "type": "string",
## "value": "conn"
## }
## },
## "keys": {},
## "reducers": {},
## "fields": {}
## },
## {
## "op": "GroupByProc",
## "filter": {
## "field": {
## "lhs": {},
## "rhs": {}
## },
## "value": {}
## },
## "limit": 0,
## "keys": [
## {
## "op": "Assignment",
## "rhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "orig_h"
## }
## }
## },
## {
## "op": "Assignment",
## "rhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "resp_h"
## }
## }
## },
## {
## "op": "Assignment",
## "rhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "resp_p"
## }
## }
## }
## ],
## "reducers": [
## {
## "op": "Assignment",
## "rhs": {
## "op": "Reducer",
## "operator": "count"
## }
## }
## ],
## "fields": {}
## },
## {
## "op": "SortProc",
## "filter": {
## "field": {
## "lhs": {},
## "rhs": {}
## },
## "value": {}
## },
## "keys": {},
## "reducers": {},
## "fields": [
## {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "orig_h"
## }
## },
## {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "resp_h"
## }
## },
## {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "BinaryExpr",
## "operator": ".",
## "lhs": {
## "op": "RootRecord"
## },
## "rhs": {
## "op": "Identifier",
## "name": "id"
## }
## },
## "rhs": {
## "op": "Identifier",
## "name": "resp_p"
## }
## }
## ],
## "sortdir": 1,
## "nullsfirst": false
## }
## ]
## }
## ...
```
### Let’s execute the query
``` r
space <- "2021-02-17-Trickbot-gtag-rob13-infection-in-AD-environment.pcap"
r <- brim_search(space, zql)
str(r, 2)
## List of 5
## $ :List of 2
## ..$ type : chr "TaskStart"
## ..$ task_id: int 0
## $ :List of 3
## ..$ type : chr "SearchRecords"
## ..$ channel_id: int 0
## ..$ records :'data.frame': 74 obs. of 4 variables:
## $ :List of 3
## ..$ type : chr "SearchEnd"
## ..$ channel_id: int 0
## ..$ reason : chr "eof"
## $ :List of 7
## ..$ type : chr "SearchStats"
## ..$ start_time :List of 2
## ..$ update_time :List of 2
## ..$ bytes_read : int 238052
## ..$ bytes_matched : int 54486
## ..$ records_read : int 1082
## ..$ records_matched: int 384
## $ :List of 2
## ..$ type : chr "TaskEnd"
## ..$ task_id: int 0
r1 <- brim_search(space, zql1)
r1
## ZQL query took 0.0000 seconds; 384 records matched; 1,082 records read; 238,052 bytes read
(r1 <- as_tibble(tidy_brim(r1)))
## # A tibble: 74 x 4
## orig_h resp_h resp_p count
## <chr> <chr> <chr> <int>
## 1 10.2.17.2 10.2.17.101 49787 1
## 2 10.2.17.101 3.222.126.94 80 1
## 3 10.2.17.101 10.2.17.1 445 1
## 4 10.2.17.101 10.2.17.2 53 97
## 5 10.2.17.101 10.2.17.2 88 27
## 6 10.2.17.101 10.2.17.2 123 5
## 7 10.2.17.101 10.2.17.2 135 8
## 8 10.2.17.101 10.2.17.2 137 2
## 9 10.2.17.101 10.2.17.2 138 2
## 10 10.2.17.101 10.2.17.2 389 37
## # … with 64 more rows
```
### Let’s try one that processes the Suricata alerts
``` r
# Z query to fetch Suricata alerts including the count of alerts per source:destination
zql2 <- "event_type=alert | count() by src_ip, dest_ip, dest_port, alert.severity, alert.signature | sort src_ip, dest_ip, dest_port, alert.severity, alert.signature"
r2 <- brim_search(space, zql2)
r2
## ZQL query took 0.0000 seconds; 47 records matched; 870 records read; 238,660 bytes read
(r2 <- (as_tibble(tidy_brim(r2))))
## # A tibble: 35 x 6
## src_ip dest_ip dest_port severity signature count
## <chr> <chr> <int> <int> <chr> <int>
## 1 10.2.17.2 10.2.17.1… 49674 3 SURICATA Applayer Detect protocol only one direction 1
## 2 10.2.17.2 10.2.17.1… 49680 3 SURICATA Applayer Detect protocol only one direction 1
## 3 10.2.17.2 10.2.17.1… 49687 3 SURICATA Applayer Detect protocol only one direction 1
## 4 10.2.17.2 10.2.17.1… 49704 3 SURICATA Applayer Detect protocol only one direction 1
## 5 10.2.17.2 10.2.17.1… 49709 3 SURICATA Applayer Detect protocol only one direction 1
## 6 10.2.17.2 10.2.17.1… 49721 3 SURICATA Applayer Detect protocol only one direction 1
## 7 10.2.17.2 10.2.17.1… 50126 3 SURICATA Applayer Detect protocol only one direction 1
## 8 10.2.17.1… 3.222.126… 80 2 ET POLICY curl User-Agent Outbound 1
## 9 10.2.17.1… 36.95.27.… 443 1 ET HUNTING Suspicious POST with Common Windows Process Names - Possib… 1
## 10 10.2.17.1… 36.95.27.… 443 1 ET MALWARE Win32/Trickbot Data Exfiltration 1
## # … with 25 more rows
```
``` r
library(igraph)
library(ggraph)
library(tidyverse)
gdf <- count(r1, orig_h, resp_h, wt=count)
count(gdf, node = resp_h, wt=n, name = "in_degree") %>%
full_join(
count(gdf, node = orig_h, name = "out_degree")
) %>%
mutate_at(
vars(in_degree, out_degree),
replace_na, 1
) %>%
arrange(in_degree) -> vdf
g <- graph_from_data_frame(gdf, vertices = vdf)
ggraph(g, layout = "linear") +
geom_node_point(
aes(size = in_degree), shape = 21
) +
geom_edge_arc(
width = 0.125,
arrow = arrow(
length = unit(5, "pt"),
type = "closed"
)
)
```
<img src="man/figures/README-graph-1.png" width="864" />
## brimr Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
|:-----|---------:|-----:|----:|-----:|------------:|-----:|---------:|-----:|
| R | 3 | 0.38 | 53 | 0.39 | 25 | 0.27 | 41 | 0.29 |
| Rmd | 1 | 0.12 | 15 | 0.11 | 21 | 0.23 | 30 | 0.21 |
| SUM | 4 | 0.50 | 68 | 0.50 | 46 | 0.50 | 71 | 0.50 |
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
|:-----|---------:|----:|----:|-----:|------------:|-----:|---------:|-----:|
| R | 4 | 0.4 | 123 | 0.36 | 48 | 0.29 | 53 | 0.27 |
| Rmd | 1 | 0.1 | 47 | 0.14 | 35 | 0.21 | 44 | 0.23 |
| SUM | 5 | 0.5 | 170 | 0.50 | 83 | 0.50 | 97 | 0.50 |
clock Package Metrics for brimr

7
man/brim_search.Rd

@ -2,9 +2,12 @@
% Please edit documentation in R/brim-main.R
\name{brim_search}
\alias{brim_search}
\alias{print.brim_search_results}
\title{Post a ZQL query to the given Brim instance and retrieve processed results}
\usage{
brim_search(space_name, zql, host = brim_host())
\method{print}{brim_search_results}(x, ...)
}
\arguments{
\item{space_name}{name of the Brim space to use as the search data source}
@ -12,6 +15,10 @@ brim_search(space_name, zql, host = brim_host())
\item{zql}{the ZQL query}
\item{host}{see \code{\link[=brim_host]{brim_host()}}}
\item{x}{a \code{brim_search_result} object}
\item{...}{unused}
}
\description{
Post a ZQL query to the given Brim instance and retrieve processed results

BIN
man/figures/README-graph-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 119 KiB

14
man/tidy_brim.Rd

@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/brim-main.R
\name{tidy_brim}
\alias{tidy_brim}
\title{Turn Brim/zqd search results into a data frame}
\usage{
tidy_brim(x)
}
\arguments{
\item{x}{Brim/zqd search results}
}
\description{
Turn Brim/zqd search results into a data frame
}
Loading…
Cancel
Save