Browse Source

hash_con()

ffuzzy
boB Rudis 4 years ago
parent
commit
428991aae1
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 6
      DESCRIPTION
  2. 1
      NAMESPACE
  3. 3
      NEWS.md
  4. 33
      R/hash-con.R
  5. 18
      README.Rmd
  6. 41
      README.md
  7. 2
      cleanup
  8. 75
      configure
  9. BIN
      inst/knuth/local.gz
  10. 1948
      inst/knuth/local.html
  11. 20
      man/hash_con.Rd
  12. 10
      src/Makevars
  13. 7
      src/Makevars.in

6
DESCRIPTION

@ -1,8 +1,8 @@
Package: ssdeepr
Type: Package
Title: Context Triggered Piecewise Hash Computation Using 'ssdeep'
Version: 0.1.0
Date: 2020-03-02
Version: 0.2.0
Date: 2020-03-03
Authors@R: c(
person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-5670-2640"))
@ -13,7 +13,7 @@ Description: The ssdeep project provides an open source library <https://github.
hashes from character/byte streams.
URL: https://git.rud.is/hrbrmstr/ssdeepr
BugReports: https://git.rud.is/hrbrmstr/ssdeepr/issues
SystemRequirements: C++11; libfuzzy <https://ssdeep-project.github.io/ssdeep/index.html#platforms>
SystemRequirements: libfuzzy <https://ssdeep-project.github.io/ssdeep/index.html#platforms>
Encoding: UTF-8
License: AGPL
Suggests:

1
NAMESPACE

@ -2,6 +2,7 @@
export("%>%")
export(hash_compare)
export(hash_con)
export(hash_file)
export(hash_raw)
importFrom(Rcpp,sourceCpp)

3
NEWS.md

@ -1,2 +1,5 @@
0.2.0
* anticonf configure script added
0.1.0
* Initial release

33
R/hash-con.R

@ -0,0 +1,33 @@
#' Return CTP hash of one data collected from a connection
#'
#' @param con a [connection]
#' @return data frame
#' @export
#' @examples
#' # All 3 shld be the same
#' hash_con(url("https://en.wikipedia.org/wiki/Donald_Knuth"))
#' hash_con(file(system.file("knuth", "local.html", package = "ssdeepr")))
#' hash_con(gzfile(system.file("knuth", "local.gz", package = "ssdeepr")))
hash_con <- function(con) {
stopifnot(inherits(con, "connection"))
if(!isOpen(con)){
on.exit(close(con)) # also destroy con if 'open' fails
open(con, "rb")
}
out <- raw(0)
repeat {
tmp <- readBin(con, "raw", n = 32768L)
if (length(tmp) == 0) {
break
} else {
out <- c(out, tmp)
}
}
hash_raw(out)
}

18
README.Rmd

@ -75,6 +75,24 @@ hash_compare(hashes$hash[1], hashes$hash[3])
hash_compare(hashes$hash[1], hashes$hash[4])
```
Works with Connections, too. All three should be the same if the Wikipedia page hasn't changed since making local copies in the package.
NOTE that retrieving the URL contents with different user-agent strings and/or with javascript-enabled may/will likely generate different content and, thus, a different hash.
```{r u-02}
(k1 <- hash_con(url("https://en.wikipedia.org/wiki/Donald_Knuth")))
(k2 <- hash_con(file(system.file("knuth", "local.html", package = "ssdeepr"))))
(k3 <- hash_con(gzfile(system.file("knuth", "local.gz", package = "ssdeepr"))))
hash_compare(k1, k2)
hash_compare(k1, k3)
hash_compare(k2, k3)
```
## ssdeepr Metrics
```{r cloc, echo=FALSE}

41
README.md

@ -28,6 +28,7 @@ character/byte streams.
The following functions are implemented:
- `hash_compare`: Compare two hashes
- `hash_con`: Return CTP hash of one data collected from a connection
- `hash_file`: Return CTP hash of one or more files
- `hash_raw`: Return CTP hash of a raw vector
@ -76,7 +77,7 @@ library(ssdeepr)
# current version
packageVersion("ssdeepr")
## [1] '0.1.0'
## [1] '0.2.0'
```
- `index.html` is a static copy of a blog main page with a bunch of
@ -116,13 +117,41 @@ hash_compare(hashes$hash[1], hashes$hash[4])
## [1] 0
```
Works with Connections, too. All three should be the same if the
Wikipedia page hasn’t changed since making local copies in the package.
NOTE that retrieving the URL contents with different user-agent strings
and/or with javascript-enabled may/will likely generate different
content and, thus, a different hash.
``` r
(k1 <- hash_con(url("https://en.wikipedia.org/wiki/Donald_Knuth")))
## [1] "3072:u2dfqECHC6NPsWzqFg2qDKgNYsVeJb19pEDTlfrd5czRsZNqqelzPFKsuXs6X9pU:PQli6NPsWzcg2/EYsVUY6sI"
(k2 <- hash_con(file(system.file("knuth", "local.html", package = "ssdeepr"))))
## [1] "3072:u2dfqECHC6NPsWzqFg2qDKgNYsVeJb19pEDTlfrd5czRsZNqqelzPFKsuXs6X9pU:PQli6NPsWzcg2/EYsVUY6sI"
(k3 <- hash_con(gzfile(system.file("knuth", "local.gz", package = "ssdeepr"))))
## [1] "3072:u2dfqECHC6NPsWzqFg2qDKgNYsVeJb19pEDTlfrd5czRsZNqqelzPFKsuXs6X9pU:PQli6NPsWzcg2/EYsVUY6sI"
hash_compare(k1, k2)
## [1] 100
hash_compare(k1, k3)
## [1] 100
hash_compare(k2, k3)
## [1] 100
```
## ssdeepr Metrics
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :--- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: |
| C++ | 2 | 0.2 | 67 | 0.52 | 21 | 0.30 | 8 | 0.07 |
| R | 7 | 0.7 | 45 | 0.35 | 22 | 0.32 | 61 | 0.54 |
| Rmd | 1 | 0.1 | 16 | 0.12 | 26 | 0.38 | 45 | 0.39 |
| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
| :----------- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| C++ | 2 | 0.15 | 67 | 0.33 | 21 | 0.23 | 8 | 0.06 |
| R | 8 | 0.62 | 62 | 0.30 | 28 | 0.30 | 71 | 0.50 |
| Bourne Shell | 2 | 0.15 | 54 | 0.26 | 9 | 0.10 | 14 | 0.10 |
| Rmd | 1 | 0.08 | 22 | 0.11 | 34 | 0.37 | 49 | 0.35 |
## Code of Conduct

2
cleanup

@ -0,0 +1,2 @@
#!/bin/sh
rm -f src/Makevars

75
configure

@ -0,0 +1,75 @@
#!/bin/sh
# Anticonf (tm) script by Jeroen Ooms (2018)
# This script will query 'pkg-config' for the required cflags and ldflags.
# If pkg-config is unavailable or does not find the library, try setting
# INCLUDE_DIR and LIB_DIR manually via e.g:
# R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib'
# Library settings
PKG_CONFIG_NAME="libfuzzy"
PKG_DEB_NAME="libfuzzy-dev"
PKG_RPM_NAME="libfuzzy-devel"
PKG_BREW_NAME="ssdeep"
PKG_TEST_HEADER="<fuzzy.h>"
PKG_LIBS_STATIC="-lfuzzy"
PKG_LIBS="-lfuzzy"
PKG_CFLAGS=""
# Use pkg-config if available
if [ $(command -v pkg-config) ]; then
PKGCONFIG_CFLAGS=$(pkg-config --cflags --silence-errors ${PKG_CONFIG_NAME})
PKGCONFIG_LIBS=$(pkg-config --libs ${PKG_CONFIG_NAME})
fi
# Note that cflags may be empty in case of success
if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then
echo "Found INCLUDE_DIR and/or LIB_DIR!"
PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS"
PKG_LIBS="-L$LIB_DIR $PKG_LIBS"
elif [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then
echo "Found pkg-config cflags and libs!"
PKG_CFLAGS=${PKGCONFIG_CFLAGS}
PKG_LIBS=${PKGCONFIG_LIBS}
elif [[ "$OSTYPE" == "darwin"* ]]; then
if [ $(command -v brew) ]; then
BREWDIR=$(brew --prefix)
else
curl -sfL "https://jeroen.github.io/autobrew/$PKG_BREW_NAME" > autobrew
source autobrew
fi
PKG_CFLAGS="-I$BREWDIR/opt/ssdeep/include"
PKG_LIBS="-L$BREWDIR/opt/ssdeep/lib -L$BREWDIR/opt/ssdeep/lib $PKG_LIBS"
fi
# For debugging
echo "Using PKG_CFLAGS=$PKG_CFLAGS"
echo "Using PKG_LIBS=$PKG_LIBS"
# Find compiler
CC=$(${R_HOME}/bin/R CMD config CC)
CFLAGS=$(${R_HOME}/bin/R CMD config CFLAGS)
CPPFLAGS=$(${R_HOME}/bin/R CMD config CPPFLAGS)
# Test configuration
echo "#include $PKG_TEST_HEADER" | ${CC} ${CPPFLAGS} ${PKG_CFLAGS} ${CFLAGS} -E -xc - >/dev/null 2>&1 || R_CONFIG_ERROR=1;
# Customize the error
if [ $R_CONFIG_ERROR ]; then
echo "------------------------- ANTICONF ERROR ---------------------------"
echo "Configuration failed because $PKG_CONFIG_NAME was not found. Try installing:"
echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)"
echo " * rpm: $PKG_RPM_NAME (Fedora, EPEL)"
echo " * brew: $PKG_BREW_NAME (OSX)"
echo "If $PKG_CONFIG_NAME is already installed, check that 'pkg-config' is in your"
echo "PATH and PKG_CONFIG_PATH contains a $PKG_CONFIG_NAME.pc file. If pkg-config"
echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:"
echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'"
echo "--------------------------------------------------------------------"
exit 1;
fi
# Write to Makevars
sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
# Success
exit 0

BIN
inst/knuth/local.gz

Binary file not shown.

1948
inst/knuth/local.html

File diff suppressed because one or more lines are too long

20
man/hash_con.Rd

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hash-con.R
\name{hash_con}
\alias{hash_con}
\title{Return CTP hash of one data collected from a connection}
\usage{
hash_con(con)
}
\arguments{
\item{con}{a \link{connection}}
}
\value{
data frame
}
\description{
Return CTP hash of one data collected from a connection
}
\examples{
hash_con(url("https://en.wikipedia.org/wiki/Donald_Knuth"))
}

10
src/Makevars

@ -1,3 +1,7 @@
CXX_STD = CXX11
PKG_CXXFLAGS =
PKG_LIBS = -lfuzzy
PKG_CPPFLAGS=-I/usr/local/opt/ssdeep/include
PKG_LIBS=-L/usr/local/opt/ssdeep/lib -L/usr/local/opt/ssdeep/lib -lfuzzy
all: clean
clean:
rm -Rf $(SHLIB) $(OBJECTS)

7
src/Makevars.in

@ -0,0 +1,7 @@
PKG_CPPFLAGS=@cflags@
PKG_LIBS=@libs@
all: clean
clean:
rm -Rf $(SHLIB) $(OBJECTS)
Loading…
Cancel
Save