Browse Source

initial commit

master
boB Rudis 7 years ago
parent
commit
3f875829e2
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 1
      .Rbuildignore
  2. 25
      CONDUCT.md
  3. 5
      NAMESPACE
  4. 4
      R/aaa.r
  5. 14
      R/bandwidth.r
  6. 19
      R/bestest.r
  7. 7
      R/closest.r
  8. 14
      R/config.r
  9. 80
      R/download.r
  10. 5
      R/servers.r
  11. 11
      R/speedtest-package.R
  12. 61
      R/upload.r
  13. 33
      R/util.r
  14. 116
      README.Rmd
  15. 295
      README.gfm-ascii_identifiers
  16. BIN
      README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-11-1.png
  17. BIN
      README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-5-1.png
  18. BIN
      README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-7-1.png
  19. BIN
      README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-9-1.png
  20. 15
      README_cache/gfm/__packages
  21. BIN
      README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.RData
  22. BIN
      README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.rdb
  23. BIN
      README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.rdx
  24. BIN
      README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.RData
  25. BIN
      README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.rdb
  26. BIN
      README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.rdx
  27. BIN
      README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.RData
  28. BIN
      README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.rdb
  29. BIN
      README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.rdx
  30. 14
      man/spd_best_servers.Rd
  31. 9
      man/spd_closest_servers.Rd
  32. 21
      man/spd_compute_bandwidth.Rd
  33. 5
      man/spd_config.Rd
  34. 44
      man/spd_download_test.Rd
  35. 8
      man/spd_servers.Rd
  36. 37
      man/spd_upload_test.Rd
  37. 8
      man/speedtest.Rd

1
.Rbuildignore

@ -8,3 +8,4 @@
^\.codecov\.yml$
^README_files$
^doc$
^CONDUCT\.md$

25
CONDUCT.md

@ -0,0 +1,25 @@
# Contributor Code of Conduct
As contributors and maintainers of this project, we pledge to respect all people who
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.
We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.
Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed
from the project team.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
opening an issue or contacting one or more of the project maintainers.
This Code of Conduct is adapted from the Contributor Covenant
(http:contributor-covenant.org), version 1.0.0, available at
http://contributor-covenant.org/version/1/0/0/

5
NAMESPACE

@ -2,9 +2,11 @@
export(spd_best_servers)
export(spd_closest_servers)
export(spd_compute_bandwidth)
export(spd_config)
export(spd_download_test)
export(spd_servers)
export(spd_upload_test)
import(httr)
import(purrr)
import(xml2)
@ -14,6 +16,9 @@ importFrom(dplyr,arrange)
importFrom(dplyr,data_frame)
importFrom(dplyr,filter)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(jsonlite,fromJSON)
importFrom(pingr,ping)
importFrom(urltools,domain)

4
R/aaa.r

@ -1,3 +1,7 @@
.base_raw <- charToRaw('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
.speedtest_ua <- "Mozilla/5.0 (Compatibe; r-speedtest/1.0; https://github.com/hrbrmstr/speedtest)"
utils::globalVariables(
c("total", "latency_url", "test_result", "ping_time", "total_time", "retrieval_time",
"bw", "size", "secs"))

14
R/bandwidth.r

@ -0,0 +1,14 @@
#' Compute bandwidth from bytes transferred and time taken
#'
#' @md
#' @param size_bytes size (in bytes) of the payload transferred
#' @param xfer_secs time taken for the transfer
#' @param mbits produce output in megabits (Mb)? Default: `TRUE`
#' @export
#' @examples
#' spd_compute_bandwidth(19200000, 1) # 150 Mb/sec
spd_compute_bandwidth <- function(size_bytes, xfer_secs, mbits=TRUE) {
res <- size_bytes / xfer_secs
if (mbits) res <- (res*8) / 1024 / 1000
res
}

19
R/bestest.r

@ -1,21 +1,31 @@
#' Find "best" servers (latency-wise) from master server list
#'
#' The input `servers` data frame will be truncatred to the first `max` and
#' HTTP and ICMP probe tests will be performed to determine initial retrieval
#' speed and latency. Not all servers respond to ICMP requests due to the way
#' their routers, switches or firewalls are configured.
#'
#' @md
#' @param servers if not `NULL`, then the data frame from [spd_servers()]. If
#' `NULL`, then [spd_servers()] will be called to retrieve the server list.
#' @param config client configuration retrieved via [spd_config()]. If `NULL` it
#' will be retrieved
#' @param max the maximum numbers of "best" servers to return. This is hard-capped
#' at 25 since Oookla is a free/sponsored service and if you plan on abusing
#' it you'll have to write your own code to do so. Default is `10`.
#' @return server list in order of latency closeness (retrieval speed column included)
#' @note the list of target servers will be truncated to the first 10
#' @note the list of target servers will be truncated to the first `max`. `max` may
#' amount may not be returned if there were errors connecting to servers.
#' @export
spd_best_servers <- function(servers=NULL, config=NULL) {
spd_best_servers <- function(servers=NULL, config=NULL, max=10) {
if (max > 25) max <- 25
if (is.null(config)) config <- spd_config()
if (is.null(servers)) servers <- spd_closest_servers(config=config)
targets <- servers
if (nrow(targets) > 10) targets <- servers[1:10,]
if (nrow(targets) > max) targets <- servers[1:max,]
.lat_dat <- list()
@ -41,9 +51,6 @@ spd_best_servers <- function(servers=NULL, config=NULL) {
}) %>%
dplyr::filter(!grepl("test=test", retrieval_time)) -> target_df
# order() is kinda not necessary since the first ones to finish are going to be
# in the list first, but it's best to be safe
dplyr::left_join(target_df, targets, "latency_url") %>%
dplyr::arrange(retrieval_time) %>%
dplyr::select(-latency_url, -test_result)

7
R/closest.r

@ -1,6 +1,4 @@
#' #' Find "closest" servers (geography-wise) from master server list
#'
#' Uses [ipinfo.io](https://ipinfo.io) to geolocate your external IP address.
#' Find "closest" servers (geography-wise) from master server list
#'
#' @md
#' @param servers if not `NULL`, then the data frame from [spd_servers()]. If
@ -9,6 +7,9 @@
#' will be retrieved
#' @return server list in order of geographic closeness
#' @export
#' @examples \dontrun{
#' spd_closest_servers()
#' }
spd_closest_servers <- function(servers=NULL, config=NULL) {
if (is.null(config)) config <- spd_config()

14
R/config.r

@ -1,6 +1,10 @@
#' Retrieve client configuration information for the speedtest
#'
#' @md
#' @export
#' @examples \dontrun{
#' spd_config()
#' }
spd_config <- function() {
res <- httr::GET("http://www.speedtest.net/speedtest-config.php")
@ -13,6 +17,16 @@ spd_config <- function() {
config <- purrr::map(config, function(.x) { c(.x, attributes(.x)) })
config$`server-config`$ignoreids <- strsplit(config$`server-config`$ignoreids, ",")[[1]]
sz <- as.numeric(gsub("[^[:digit:]]", "", config$upload$mintestsize))
if (grepl("[^[:digit:]]", config$upload$mintestsize)) {
up_units <- gsub("[[:digit:]]", "", config$upload$mintestsize)
sz <- as.numeric(gsub("[^[:digit:]]", "", config$upload$mintestsize))
sz <- sz * switch(up_units, K=1024, M=1024000)
}
config$upload$mintestsize <- sz
config
}

80
R/download.r

@ -1,37 +1,73 @@
#' Download test
#' Perform a download speed/bandwidth test
#'
#' Currently, ten tests are performed in increasing order of size.
#'
#' This uses the legacy HTTP method of determining your bandwidth/speed and,
#' as such, has many issues. Rather than hack-compensate for error-prone
#' results with smaller files used on high-bandwidth connections, raw size +
#' transfer speed data is returned enabling you to perform your own adjustments
#' or choose which values to "believe".
#'
#' @md
#' @param server a data frame row from one of the functions that retrieves or
#' filters a server list. You can pass in a full servers list but
#' only the first entry will be processed.
#' @param config client configuration retrieved via [spd_config()]. If `NULL` it
#' will be retrieved
#' @param summarise the raw results from each test --- including file sizes ---
#' will be returned if the value is `FALSE`. If `TRUE` only summary
#' statistics will be returned.
#' @param timeout max time (seconds) to wait for a connection or download to finish.
#' Default is `60` seconds
#' @note speed/bandwidth values are in Mbits/s; these tests consume bandwidth so
#' if you're on a metered connection, you may incur charges.
#' @export
spd_download_test <- function(server, config=NULL) {
#' @examples \dontrun{
#' config <- spd_config()
#'
#' servers <- spd_servers(config=config)
#' closest_servers <- spd_closest_servers(servers, config=config)
#' only_the_best_severs <- spd_best_servers(closest_servers, config)
#'
#' spd_download_test(closest_servers, config=config)
#' spd_download_test(best_servers, config=config)
#' }
spd_download_test <- function(server, config=NULL, summarise=TRUE, timeout=60) {
if (nrow(server) > 1) server <- server[1,]
server <- unclass(server)
if (is.null(config)) config <- spd_config()
down_sizes <- c(350, 500, 750, 1000, 1500, 2000, 2500, 3000, 3500, 4000)
dl_urls <- sprintf("%s/random%sx%s.jpg", dirname(server$url), down_sizes, down_sizes)
dl_urls <- sprintf("%s/random%sx%s.jpg", dirname(server$url[1]), down_sizes, down_sizes)
pb <- dplyr::progress_estimated(length(dl_urls))
purrr::map(dl_urls, ~{
pb$tick()$print()
httr::GET(
url = .x,
httr::add_headers(
`Referer` = "http://c.speedtest.net/flash/speedtest.swf",
`Cache-Control` = "no-cache"
),
httr::user_agent(
splashr::ua_macos_chrome
),
query=list(ts=as.numeric(Sys.time()))
)
}) -> dl_resp
purrr::discard(dl_resp, ~.x$status_code != 200) %>%
purrr::map_df(~{
list(secs = .x$times[6] - .x$times[5], size = (length(.x$content) + length(.x$header)))
res <- .download_one(url=.x, timeout=timeout)
res$result
}) %>%
dplyr::mutate(bw = ((size/secs)*8) / 1024 / 1024) %>%
dplyr::summarise(min=min(bw), mean=mean(bw), median=median(bw), max=max(bw), sd=sd(bw), var=var(bw))
purrr::discard(is.null) %>%
purrr::discard(~.x$status_code != 200) %>%
purrr::map_df(~{
list(
test = "download",
secs = .x$times[6] - .x$times[5],
size = sum(purrr::map_dbl(names(unlist(.x$all_headers)), nchar)) +
sum(purrr::map_dbl(unlist(.x$all_headers), nchar)) + length(.x$content)
)
}) %>%
dplyr::mutate(bw = spd_compute_bandwidth(size, secs)) -> out
if (summarise) {
out <- dplyr::summarise(out, min=min(bw, na.rm=TRUE), mean=mean(bw, na.rm=TRUE),
median=median(bw, na.rm=TRUE), max=max(bw, na.rm=TRUE),
sd=sd(bw, na.rm=TRUE))
}
out$id <- server$id
dplyr::left_join(server, out, "id")
}

5
R/servers.r

@ -1,9 +1,14 @@
#' Retrieve a list of SpeedTest servers
#'
#' @md
#' @param config client configuration retrieved via [spd_config()]. If `NULL` it
#' will be retrieved
#' @return data frame
#' @export
#' @examples \dontrun{
#' config <- spd_config()
#' spd_servers(config)
#' }
spd_servers <- function(config=NULL) {
res <- httr::GET("https://www.speedtest.net/speedtest-servers-static.php")

11
R/speedtest-package.R

@ -1,11 +1,18 @@
#' ...
#' Tools to Test and Compare Internet Bandwidth Speeds
#'
#' The 'Ookla' 'Speedtest' site <http://beta.speedtest.net/about> provides
#' interactive and programmatic services to test and compare bandwidth speeds
#' from a source node on the Internet to thousands of test servers. Tools are
#' provided to obtain test server lists, identify target servers for testing and
#' performing speed/bandwidth tests.
#'
#' @md
#' @name speedtest
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr xml2 httr
#' @importFrom utils globalVariables
#' @importFrom dplyr left_join arrange filter data_frame
#' @importFrom dplyr left_join arrange filter data_frame select summarise mutate
#' @importFrom jsonlite fromJSON
#' @importFrom curl curl_fetch_multi multi_run
#' @importFrom pingr ping

61
R/upload.r

@ -0,0 +1,61 @@
#' Perform an upload speed/bandwidth test
#'
#' Currently, six tests are performed in increasing order of size.
#'
#' This uses the legacy HTTP method of determining your bandwidth/speed and,
#' as such, has many issues. Rather than hack-compensate for error-prone
#' results with smaller files used on high-bandwidth connections, raw size +
#' transfer speed data is returned enabling you to perform your own adjustments
#' or choose which values to "believe".
#'
#' @md
#' @param server a data frame row from one of the functions that retrieves or
#' filters a server list. You can pass in a full servers list but
#' only the first entry will be processed.
#' @param config client configuration retrieved via [spd_config()]. If `NULL` it
#' will be retrieved
#' @param summarise the raw results from each test --- including file sizes ---
#' will be returned if the value is `FALSE`. If `TRUE` only summary
#' statistics will be returned.
#' @param timeout max time (seconds) to wait for a connection or download to finish.
#' Default is `60` seconds
#' @note speed/bandwidth values are in Mbits/s; these tests consume bandwidth so
#' if you're on a metered connection, you may incur charges.
#' @export
spd_upload_test <- function(server, config=NULL, summarise=TRUE, timeout=60) {
if (nrow(server) > 1) server <- server[1,]
if (is.null(config)) config <- spd_config()
up_sizes <- c(131072, 262144, 524288, 1048576, 4194304, 8388608)
pb <- dplyr::progress_estimated(length(up_sizes))
purrr::map(up_sizes, ~{
pb$tick()$print()
.dat <- sample(.base_raw, .x, replace=TRUE)
res <- .upload_one(server$url[1], .dat, timeout)
list(sz=.x, res=res$result)
}) %>%
purrr::discard(~is.null(.x$res)) %>%
purrr::discard(~.x$res$status_code != 200) %>%
purrr::map_df(~{
list(
test = "upload",
secs = .x$res$times[6] - .x$res$times[5],
size = .x$sz
)
}) %>%
dplyr::mutate(bw = spd_compute_bandwidth(size, secs)) -> out
if (summarise) {
out <- dplyr::summarise(out, min=min(bw, na.rm=TRUE), mean=mean(bw, na.rm=TRUE),
median=median(bw, na.rm=TRUE), max=max(bw, na.rm=TRUE),
sd=sd(bw, na.rm=TRUE))
}
out$id <- server$id
dplyr::left_join(server, out, "id")
}

33
R/util.r

@ -1 +1,32 @@
sGET <- purrr::safely(httr::GET)
sGET <- purrr::safely(httr::GET)
sPOST <- purrr::safely(httr::POST)
.download_one <- function(url, timeout) {
sGET(
url = url,
httr::add_headers(
`Referer` = "http://c.speedtest.net/flash/speedtest.swf",
`Cache-Control` = "no-cache" # try to bust transparent proxy caches
),
httr::user_agent(.speedtest_ua),
httr::timeout(timeout),
query=list(ts=as.numeric(Sys.time())) # try to bust transparent proxy caches
)
}
.upload_one <- function(url, dat, timeout) {
sPOST(
url = url,
httr::add_headers(
`Referer` = "http://c.speedtest.net/flash/speedtest.swf",
`Connection` = "Keep-Alive",
`Cache-Control` = "no-cache" # try to bust transparent proxy caches
),
encode="form",
body=dat,
httr::user_agent(.speedtest_ua),
httr::timeout(timeout),
query=list(ts=as.numeric(Sys.time())) # try to bust transparent proxy caches
)
}

116
README.Rmd

@ -19,10 +19,12 @@ speed/bandwidth tests.
The following functions are implemented:
- `spd_best_servers`: Find "best" servers (latency-wise) from master server list
- `spd_closest_servers`: #' Find "closest" servers (geography-wise) from master server list
- `spd_closest_servers`: Find "closest" servers (geography-wise) from master server list
- `spd_compute_bandwidth`: Compute bandwidth from bytes transferred and time taken
- `spd_config`: Retrieve client configuration information for the speedtest
- `spd_download_test`: Download test
- `spd_download_test`: Perform a download speed/bandwidth test
- `spd_servers`: Retrieve a list of SpeedTest servers
- `spd_upload_test`: Perform an upload speed/bandwidth test
## TODO
@ -32,7 +34,6 @@ Folks interested in contributing can take a look at the TODOs and pick as many a
- [ ] Figure out how to use beta sockets hidden API vs the old Flash API?
- [ ] Ensure the efficacy of relying on the cURL timings for speed measures for the Flash API
- [ ] Figure out best way to capture the results for post-processing
- [ ] Upload speed measures!
- [ ] Upload results to speedtest (tis only fair)!
- [ ] Incorporate more network or host measures for better statistical determination of the best target!
- [ ] `autoplot` support!
@ -54,9 +55,118 @@ options(width=120)
```{r message=FALSE, warning=FALSE, error=FALSE}
library(speedtest)
library(stringi)
library(hrbrthemes)
library(ggbeeswarm)
library(tidyverse)
# current verison
packageVersion("speedtest")
```
### Download Speed
```{r message=FALSE, warning=FALSE, error=FALSE, cache=FALSE}
config <- spd_config()
servers <- spd_servers(config=config)
closest_servers <- spd_closest_servers(servers, config=config)
only_the_best_severs <- spd_best_servers(closest_servers, config)
```
### Individual download tests
```{r message=FALSE, warning=FALSE, error=FALSE}
glimpse(spd_download_test(closest_servers[1,], config=config))
glimpse(spd_download_test(only_the_best_severs[1,], config=config))
```
### Individual download tests
```{r message=FALSE, warning=FALSE, error=FALSE}
glimpse(spd_upload_test(only_the_best_severs[1,], config=config))
glimpse(spd_upload_test(closest_servers[1,], config=config))
```
### Moar download tests
Choose closest, "best" and randomly (there can be, and are, some dups as a result for best/closest), run the test and chart the results. This will show just how disparate the results are from these core/crude tests. Most of the test servers compensate when they present the results. Newer, "socket"-based tests are more accurate but there are no free/hidden exposed APIs yet for most of them.
```{r message=FALSE, warning=FALSE, error=FALSE, cache=FALSE}
set.seed(8675309)
bind_rows(
closest_servers[1:3,] %>%
mutate(type="closest"),
only_the_best_severs[1:3,] %>%
mutate(type="best"),
filter(servers, !(id %in% c(closest_servers[1:3,]$id, only_the_best_severs[1:3,]$id))) %>%
sample_n(3) %>%
mutate(type="random")
) %>%
group_by(type) %>%
ungroup() -> to_compare
select(to_compare, sponsor, name, country, host, type)
```
```{r message=FALSE, warning=FALSE, error=FALSE, cache=TRUE}
map_df(1:nrow(to_compare), ~{
spd_download_test(to_compare[.x,], config=config, summarise=FALSE, timeout=30)
}) -> dl_results_full
```
```{r message=FALSE, warning=FALSE, error=FALSE}
mutate(dl_results_full, type=stri_trans_totitle(type)) %>%
ggplot(aes(type, bw, fill=type)) +
geom_quasirandom(aes(size=size, color=type), width=0.15, shape=21, stroke=0.25) +
scale_y_continuous(expand=c(0,5), labels=c(sprintf("%s", seq(0,150,50)), "200 Mb/s"), limits=c(0,200)) +
scale_size(range=c(2,6)) +
scale_color_manual(values=c(Random="#b2b2b2", Best="#2b2b2b", Closest="#2b2b2b")) +
scale_fill_ipsum() +
labs(x=NULL, y=NULL, title="Download bandwidth test by selected server type",
subtitle="Circle size scaled by size of file used in that speed test") +
theme_ipsum_rc(grid="Y") +
theme(legend.position="none")
```
### Moar upload tests
Choose closest and "best" and filter duplicates out since we're really trying to measure here vs show the disparity:
```{r message=FALSE, warning=FALSE, error=FALSE, cache=TRUE}
bind_rows(
closest_servers[1:3,] %>% mutate(type="closest"),
only_the_best_severs[1:3,] %>% mutate(type="best")
) %>%
distinct(.keep_all=TRUE) -> to_compare
select(to_compare, sponsor, name, country, host, type)
map_df(1:nrow(to_compare), ~{
spd_upload_test(to_compare[.x,], config=config, summarise=FALSE, timeout=30)
}) -> ul_results_full
```
```{r message=FALSE, warning=FALSE, error=FALSE}
ggplot(ul_results_full, aes(x="Upload Test", y=bw)) +
geom_quasirandom(aes(size=size, fill="col"), width=0.1, shape=21, stroke=0.25, color="#2b2b2b") +
scale_y_continuous(expand=c(0,0.5), breaks=seq(0,16,4),
labels=c(sprintf("%s", seq(0,12,4)), "16 Mb/s"), limits=c(0,16)) +
scale_size(range=c(2,6)) +
scale_fill_ipsum() +
labs(x=NULL, y=NULL, title="Upload bandwidth test by selected server type",
subtitle="Circle size scaled by size of file used in that speed test") +
theme_ipsum_rc(grid="Y") +
theme(legend.position="none")
```
## Code of Conduct
Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

295
README.gfm-ascii_identifiers

@ -0,0 +1,295 @@
# speedtest
Tools to Test and Compare Internet Bandwidth Speeds
## Description
The ‘Ookla’ ‘Speedtest’ site <http://beta.speedtest.net/about> provides
interactive and programmatic services to test and compare bandwidth
speeds from a source node on the Internet to thousands of test servers.
Tools are provided to obtain test server lists, identify target servers
for testing and performing speed/bandwidth tests.
## What’s Inside The Tin
The following functions are implemented:
- `spd_best_servers`: Find “best” servers (latency-wise) from master
server list
- `spd_closest_servers`: Find “closest” servers (geography-wise) from
master server list
- `spd_compute_bandwidth`: Compute bandwidth from bytes transferred
and time taken
- `spd_config`: Retrieve client configuration information for the
speedtest
- `spd_download_test`: Perform a download speed/bandwidth test
- `spd_servers`: Retrieve a list of SpeedTest servers
- `spd_upload_test`: Perform an upload speed/bandwidth test
## TODO
Folks interested in contributing can take a look at the TODOs and pick
as many as you like\! Ones with question marks are truly a “I dunno if
we shld” kinda thing. Ones with exclamation marks are essentials.
- \[ \] Cache config in memory at startup vs pass around to functions?
- \[ \] Figure out how to use beta sockets hidden API vs the old Flash
API?
- \[ \] Ensure the efficacy of relying on the cURL timings for speed
measures for the Flash API
- \[ \] Figure out best way to capture the results for post-processing
- \[ \] Upload results to speedtest (tis only fair)\!
- \[ \] Incorporate more network or host measures for better
statistical determination of the best target\!
- \[ \] `autoplot` support\!
- \[ \] RStudio Add-in
- \[ \] CLI wrapper
- \[ \] Shiny app?
## Installation
``` r
devtools::install_github("hrbrmstr/speedtest")
```
## Usage
``` r
library(speedtest)
library(stringi)
library(hrbrthemes)
library(ggbeeswarm)
library(tidyverse)
# current verison
packageVersion("speedtest")
```
## [1] '0.1.0'
### Download Speed
``` r
config <- spd_config()
servers <- spd_servers(config=config)
closest_servers <- spd_closest_servers(servers, config=config)
only_the_best_severs <- spd_best_servers(closest_servers, config)
```
### Individual download tests
``` r
glimpse(spd_download_test(closest_servers[1,], config=config))
```
## Observations: 1
## Variables: 15
## $ url <chr> "http://speed0.xcelx.net/speedtest/upload.php"
## $ lat <dbl> 42.3875
## $ lng <dbl> -71.1
## $ name <chr> "Somerville, MA"
## $ country <chr> "United States"
## $ cc <chr> "US"
## $ sponsor <chr> "Axcelx Technologies LLC"
## $ id <chr> "5960"
## $ host <chr> "speed0.xcelx.net:8080"
## $ url2 <chr> "http://speed1.xcelx.net/speedtest/upload.php"
## $ min <dbl> 21.66333
## $ mean <dbl> 74.9058
## $ median <dbl> 67.30838
## $ max <dbl> 136.195
## $ sd <dbl> 41.85532
``` r
glimpse(spd_download_test(only_the_best_severs[1,], config=config))
```
## Observations: 1
## Variables: 18
## $ ping_time <dbl> 0.038836
## $ total_time <dbl> 0.114157
## $ retrieval_time <dbl> 1.7e-05
## $ url <chr> "http://speedtest.norwoodlight.com/speedtest/upload.php"
## $ lat <dbl> 42.1944
## $ lng <dbl> -71.2
## $ name <chr> "Norwood, MA"
## $ country <chr> "United States"
## $ cc <chr> "US"
## $ sponsor <chr> "Norwood Light Broadband"
## $ id <chr> "4920"
## $ host <chr> "speedtest.norwoodlight.com:8080"
## $ url2 <chr> "http://netgauge.norwoodlight.com/speedtest/upload.php"
## $ min <dbl> 10.52263
## $ mean <dbl> 50.86126
## $ median <dbl> 30.98824
## $ max <dbl> 111.8747
## $ sd <dbl> 35.40526
### Individual download tests
``` r
glimpse(spd_upload_test(only_the_best_severs[1,], config=config))
```
## Observations: 1
## Variables: 18
## $ ping_time <dbl> 0.038836
## $ total_time <dbl> 0.114157
## $ retrieval_time <dbl> 1.7e-05
## $ url <chr> "http://speedtest.norwoodlight.com/speedtest/upload.php"
## $ lat <dbl> 42.1944
## $ lng <dbl> -71.2
## $ name <chr> "Norwood, MA"
## $ country <chr> "United States"
## $ cc <chr> "US"
## $ sponsor <chr> "Norwood Light Broadband"
## $ id <chr> "4920"
## $ host <chr> "speedtest.norwoodlight.com:8080"
## $ url2 <chr> "http://netgauge.norwoodlight.com/speedtest/upload.php"
## $ min <dbl> 7.307031
## $ mean <dbl> 8.812541
## $ median <dbl> 8.814865
## $ max <dbl> 10.77554
## $ sd <dbl> 1.260674
``` r
glimpse(spd_upload_test(closest_servers[1,], config=config))
```
## Observations: 1
## Variables: 15
## $ url <chr> "http://speed0.xcelx.net/speedtest/upload.php"
## $ lat <dbl> 42.3875
## $ lng <dbl> -71.1
## $ name <chr> "Somerville, MA"
## $ country <chr> "United States"
## $ cc <chr> "US"
## $ sponsor <chr> "Axcelx Technologies LLC"
## $ id <chr> "5960"
## $ host <chr> "speed0.xcelx.net:8080"
## $ url2 <chr> "http://speed1.xcelx.net/speedtest/upload.php"
## $ min <dbl> 5.314042
## $ mean <dbl> 10.20264
## $ median <dbl> 10.64703
## $ max <dbl> 13.97671
## $ sd <dbl> 3.113636
### Moar download tests
Choose closest, “best” and randomly (there can be, and are, some dups as
a result for best/closest), run the test and chart the results. This
will show just how disparate the results are from these core/crude
tests. Most of the test servers compensate when they present the
results. Newer, “socket”-based tests are more accurate but there are no
free/hidden exposed APIs yet for most of them.
``` r
set.seed(8675309)
bind_rows(
closest_servers[1:3,] %>%
mutate(type="closest"),
only_the_best_severs[1:3,] %>%
mutate(type="best"),
filter(servers, !(id %in% c(closest_servers[1:3,]$id, only_the_best_severs[1:3,]$id))) %>%
sample_n(3) %>%
mutate(type="random")
) %>%
group_by(type) %>%
ungroup() -> to_compare
select(to_compare, sponsor, name, country, host, type)
```
## # A tibble: 9 x 5
## sponsor name country host type
## <chr> <chr> <chr> <chr> <chr>
## 1 Axcelx Technologies LLC Somerville, MA United States speed0.xcelx.net:8080 closest
## 2 Comcast Boston, MA United States stosat-ndhm-01.sys.comcast.net:8080 closest
## 3 Starry, Inc. Boston, MA United States speedtest-server.starry.com:8080 closest
## 4 Norwood Light Broadband Norwood, MA United States speedtest.norwoodlight.com:8080 best
## 5 Comcast Boston, MA United States stosat-ndhm-01.sys.comcast.net:8080 best
## 6 Oxford Networks Lewiston, ME United States sp1.oxfordnetworks.com:8080 best
## 7 PirxNet Gliwice Poland st.pirx.pl:8080 random
## 8 QuadraNet, Inc Los Angeles, CA United States 69.12.66.27:80 random
## 9 PT. Telekomunikasi Indonesia Semarang Indonesia semarang.speedtest.telkom.net.id:8080 random
``` r
map_df(1:nrow(to_compare), ~{
spd_download_test(to_compare[.x,], config=config, summarise=FALSE, timeout=30)
}) -> dl_results_full
```
``` r
mutate(dl_results_full, type=stri_trans_totitle(type)) %>%
ggplot(aes(type, bw, fill=type)) +
geom_quasirandom(aes(size=size, color=type), width=0.15, shape=21, stroke=0.25) +
scale_y_continuous(expand=c(0,5), labels=c(sprintf("%s", seq(0,150,50)), "200 Mb/s"), limits=c(0,200)) +
scale_size(range=c(2,6)) +
scale_color_manual(values=c(Random="#b2b2b2", Best="#2b2b2b", Closest="#2b2b2b")) +
scale_fill_ipsum() +
labs(x=NULL, y=NULL, title="Download bandwidth test by selected server type",
subtitle="Circle size scaled by size of file used in that speed test") +
theme_ipsum_rc(grid="Y") +
theme(legend.position="none")
```
![](README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-9-1.png)<!-- -->
### Moar upload tests
Choose closest and “best” and filter duplicates out since we’re really
trying to measure here vs show the disparity:
``` r
bind_rows(
closest_servers[1:3,] %>% mutate(type="closest"),
only_the_best_severs[1:3,] %>% mutate(type="best")
) %>%
distinct(.keep_all=TRUE) -> to_compare
select(to_compare, sponsor, name, country, host, type)
```
## # A tibble: 6 x 5
## sponsor name country host type
## <chr> <chr> <chr> <chr> <chr>
## 1 Axcelx Technologies LLC Somerville, MA United States speed0.xcelx.net:8080 closest
## 2 Comcast Boston, MA United States stosat-ndhm-01.sys.comcast.net:8080 closest
## 3 Starry, Inc. Boston, MA United States speedtest-server.starry.com:8080 closest
## 4 Norwood Light Broadband Norwood, MA United States speedtest.norwoodlight.com:8080 best
## 5 Comcast Boston, MA United States stosat-ndhm-01.sys.comcast.net:8080 best
## 6 Oxford Networks Lewiston, ME United States sp1.oxfordnetworks.com:8080 best
``` r
map_df(1:nrow(to_compare), ~{
spd_upload_test(to_compare[.x,], config=config, summarise=FALSE, timeout=30)
}) -> ul_results_full
```
``` r
ggplot(ul_results_full, aes(x="Upload Test", y=bw)) +
geom_quasirandom(aes(size=size, fill="col"), width=0.1, shape=21, stroke=0.25, color="#2b2b2b") +
scale_y_continuous(expand=c(0,0.5), breaks=seq(0,16,4),
labels=c(sprintf("%s", seq(0,12,4)), "16 Mb/s"), limits=c(0,16)) +
scale_size(range=c(2,6)) +
scale_fill_ipsum() +
labs(x=NULL, y=NULL, title="Upload bandwidth test by selected server type",
subtitle="Circle size scaled by size of file used in that speed test") +
theme_ipsum_rc(grid="Y") +
theme(legend.position="none")
```
![](README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
## Code of Conduct
Please note that this project is released with a [Contributor Code of
Conduct](CONDUCT.md). By participating in this project you agree to
abide by its terms.

BIN
README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-11-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-5-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

BIN
README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-7-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
README.gfm-ascii_identifiers_files/figure-gfm/unnamed-chunk-9-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

15
README_cache/gfm/__packages

@ -0,0 +1,15 @@
base
speedtest
stringi
hrbrthemes
ggplot2
ggbeeswarm
tidyverse
tibble
tidyr
readr
purrr
dplyr
stringr
forcats
bindrcpp

BIN
README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.RData

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.rdb

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-10_b7574424094e6afbf2484aad688918ae.rdx

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.RData

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.rdb

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-4_c1e8e4162dad3e4d9b4a994ae1c5d558.rdx

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.RData

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.rdb

Binary file not shown.

BIN
README_cache/gfm/unnamed-chunk-8_63201fbce0e2c3afdeb25c59a8991d7e.rdx

Binary file not shown.

14
man/spd_best_servers.Rd

@ -4,7 +4,7 @@
\alias{spd_best_servers}
\title{Find "best" servers (latency-wise) from master server list}
\usage{
spd_best_servers(servers = NULL, config = NULL)
spd_best_servers(servers = NULL, config = NULL, max = 10)
}
\arguments{
\item{servers}{if not \code{NULL}, then the data frame from \code{\link[=spd_servers]{spd_servers()}}. If
@ -12,13 +12,21 @@ spd_best_servers(servers = NULL, config = NULL)
\item{config}{client configuration retrieved via \code{\link[=spd_config]{spd_config()}}. If \code{NULL} it
will be retrieved}
\item{max}{the maximum numbers of "best" servers to return. This is hard-capped
at 25 since Oookla is a free/sponsored service and if you plan on abusing
it you'll have to write your own code to do so. Default is \code{10}.}
}
\value{
server list in order of latency closeness (retrieval speed column included)
}
\description{
Find "best" servers (latency-wise) from master server list
The input \code{servers} data frame will be truncatred to the first \code{max} and
HTTP and ICMP probe tests will be performed to determine initial retrieval
speed and latency. Not all servers respond to ICMP requests due to the way
their routers, switches or firewalls are configured.
}
\note{
the list of target servers will be truncated to the first 10
the list of target servers will be truncated to the first \code{max}. \code{max} may
amount may not be returned if there were errors connecting to servers.
}

9
man/spd_closest_servers.Rd

@ -2,7 +2,7 @@
% Please edit documentation in R/closest.r
\name{spd_closest_servers}
\alias{spd_closest_servers}
\title{#' Find "closest" servers (geography-wise) from master server list}
\title{Find "closest" servers (geography-wise) from master server list}
\usage{
spd_closest_servers(servers = NULL, config = NULL)
}
@ -17,5 +17,10 @@ will be retrieved}
server list in order of geographic closeness
}
\description{
Uses \href{https://ipinfo.io}{ipinfo.io} to geolocate your external IP address.
Find "closest" servers (geography-wise) from master server list
}
\examples{
\dontrun{
spd_closest_servers()
}
}

21
man/spd_compute_bandwidth.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bandwidth.r
\name{spd_compute_bandwidth}
\alias{spd_compute_bandwidth}
\title{Compute bandwidth from bytes transferred and time taken}
\usage{
spd_compute_bandwidth(size_bytes, xfer_secs, mbits = TRUE)
}
\arguments{
\item{size_bytes}{size (in bytes) of the payload transferred}
\item{xfer_secs}{time taken for the transfer}
\item{mbits}{produce output in megabits (Mb)? Default: \code{TRUE}}
}
\description{
Compute bandwidth from bytes transferred and time taken
}
\examples{
spd_compute_bandwidth(19200000, 1) # 150 Mb/sec
}

5
man/spd_config.Rd

@ -9,3 +9,8 @@ spd_config()
\description{
Retrieve client configuration information for the speedtest
}
\examples{
\dontrun{
spd_config()
}
}

44
man/spd_download_test.Rd

@ -2,10 +2,48 @@
% Please edit documentation in R/download.r
\name{spd_download_test}
\alias{spd_download_test}
\title{Download test}
\title{Perform a download speed/bandwidth test}
\usage{
spd_download_test(server, config = NULL)
spd_download_test(server, config = NULL, summarise = TRUE, timeout = 60)
}
\arguments{
\item{server}{a data frame row from one of the functions that retrieves or
filters a server list. You can pass in a full servers list but
only the first entry will be processed.}
\item{config}{client configuration retrieved via \code{\link[=spd_config]{spd_config()}}. If \code{NULL} it
will be retrieved}
\item{summarise}{the raw results from each test --- including file sizes ---
will be returned if the value is \code{FALSE}. If \code{TRUE} only summary
statistics will be returned.}
\item{timeout}{max time (seconds) to wait for a connection or download to finish.
Default is \code{60} seconds}
}
\description{
Download test
Currently, ten tests are performed in increasing order of size.
}
\details{
This uses the legacy HTTP method of determining your bandwidth/speed and,
as such, has many issues. Rather than hack-compensate for error-prone
results with smaller files used on high-bandwidth connections, raw size +
transfer speed data is returned enabling you to perform your own adjustments
or choose which values to "believe".
}
\note{
speed/bandwidth values are in Mbits/s; these tests consume bandwidth so
if you're on a metered connection, you may incur charges.
}
\examples{
\dontrun{
config <- spd_config()
servers <- spd_servers(config=config)
closest_servers <- spd_closest_servers(servers, config=config)
only_the_best_severs <- spd_best_servers(closest_servers, config)
spd_download_test(closest_servers, config=config)
spd_download_test(best_servers, config=config)
}
}

8
man/spd_servers.Rd

@ -7,7 +7,7 @@
spd_servers(config = NULL)
}
\arguments{
\item{config}{client configuration retrieved via [spd_config()]. If `NULL` it
\item{config}{client configuration retrieved via \code{\link[=spd_config]{spd_config()}}. If \code{NULL} it
will be retrieved}
}
\value{
@ -16,3 +16,9 @@ data frame
\description{
Retrieve a list of SpeedTest servers
}
\examples{
\dontrun{
config <- spd_config()
spd_servers(config)
}
}

37
man/spd_upload_test.Rd

@ -0,0 +1,37 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/upload.r
\name{spd_upload_test}
\alias{spd_upload_test}
\title{Perform an upload speed/bandwidth test}
\usage{
spd_upload_test(server, config = NULL, summarise = TRUE, timeout = 60)
}
\arguments{
\item{server}{a data frame row from one of the functions that retrieves or
filters a server list. You can pass in a full servers list but
only the first entry will be processed.}
\item{config}{client configuration retrieved via \code{\link[=spd_config]{spd_config()}}. If \code{NULL} it
will be retrieved}
\item{summarise}{the raw results from each test --- including file sizes ---
will be returned if the value is \code{FALSE}. If \code{TRUE} only summary
statistics will be returned.}
\item{timeout}{max time (seconds) to wait for a connection or download to finish.
Default is \code{60} seconds}
}
\description{
Currently, six tests are performed in increasing order of size.
}
\details{
This uses the legacy HTTP method of determining your bandwidth/speed and,
as such, has many issues. Rather than hack-compensate for error-prone
results with smaller files used on high-bandwidth connections, raw size +
transfer speed data is returned enabling you to perform your own adjustments
or choose which values to "believe".
}
\note{
speed/bandwidth values are in Mbits/s; these tests consume bandwidth so
if you're on a metered connection, you may incur charges.
}

8
man/speedtest.Rd

@ -4,9 +4,13 @@
\name{speedtest}
\alias{speedtest}
\alias{speedtest-package}
\title{...}
\title{Tools to Test and Compare Internet Bandwidth Speeds}
\description{
...
The 'Ookla' 'Speedtest' site \url{http://beta.speedtest.net/about} provides
interactive and programmatic services to test and compare bandwidth speeds
from a source node on the Internet to thousands of test servers. Tools are
provided to obtain test server lists, identify target servers for testing and
performing speed/bandwidth tests.
}
\author{
Bob Rudis (bob@rud.is)

Loading…
Cancel
Save