boB Rudis
5 years ago
11 changed files with 367 additions and 3 deletions
@ -1,4 +1,10 @@ |
|||
# Generated by roxygen2: do not edit by hand |
|||
|
|||
S3method(print,mediacloud_stats) |
|||
export(mc_media_list) |
|||
export(mc_stats) |
|||
export(mediacloud_api_key) |
|||
import(httr) |
|||
import(stringi) |
|||
importFrom(jsonlite,fromJSON) |
|||
importFrom(scales,comma) |
|||
|
@ -0,0 +1,7 @@ |
|||
httr::user_agent( |
|||
sprintf( |
|||
"mediacloud package v%s: (<%s>)", |
|||
utils::packageVersion("mediacloud"), |
|||
utils::packageDescription("mediacloud")$URL |
|||
) |
|||
) -> .MEDIACLOUD_UA |
@ -0,0 +1,46 @@ |
|||
#' Get or set MEDIACLOUD_API_KEY value |
|||
#' |
|||
#' The API wrapper functions in this package all rely on a Media Cloud API |
|||
#' key residing in the environment variable \code{MEDIACLOUD_API_KEY}. The |
|||
#' easiest way to accomplish this is to set it in the `\code{.Renviron}` file in your |
|||
#' home directory. |
|||
#' |
|||
#' Every API call must include a key parameter which will authenticate you to |
|||
#' the API service. To get a key, register for a user via |
|||
#' <https://topics.mediacloud.org/#/user/signup>. Once you have an account go |
|||
#' here to see your key <https://topics.mediacloud.org/#/user/profile>. |
|||
#' |
|||
#' @param force force setting a new Media Cloud API key for the current environment? |
|||
#' @return atomic character vector containing the Media Cloud API key |
|||
#' @references <https://github.com/berkmancenter/mediacloud/blob/master/doc/api_2_0_spec/api_2_0_spec.md> |
|||
#' @export |
|||
mediacloud_api_key <- function(force = FALSE) { |
|||
|
|||
env <- Sys.getenv('MEDIACLOUD_API_KEY') |
|||
if (!identical(env, "") && !force) return(env) |
|||
|
|||
env <- Sys.getenv("MEDIACLOUD_API_KEY") |
|||
if (!identical(env, "") && !force) { |
|||
message("MEDIACLOUD_API_KEY is deprecated, please update environment variable to MEDIACLOUD_API_KEY") |
|||
return(env) |
|||
} |
|||
|
|||
if (!interactive()) { |
|||
stop("Please set env var MEDIACLOUD_API_KEY to your Media Cloud API key", |
|||
call. = FALSE) |
|||
} |
|||
|
|||
message("Couldn't find env var MEDIACLOUD_API_KEY See ?MEDIACLOUD_API_KEY for more details.") |
|||
message("Please enter your API key and press enter:") |
|||
pat <- readline(": ") |
|||
|
|||
if (identical(pat, "")) { |
|||
stop("Media Cloud API key entry failed", call. = FALSE) |
|||
} |
|||
|
|||
message("Updating MEDIACLOUD_API_KEY env var to PAT") |
|||
Sys.setenv(MEDIACLOUD_API_KEY = pat) |
|||
|
|||
pat |
|||
|
|||
} |
@ -0,0 +1,91 @@ |
|||
#' Return multiple media sources |
|||
#' |
|||
#' @param name Name of media source for which to search; If this parameter is |
|||
#' specified, the call returns only media sources that match a case |
|||
#' insensitive search specified value. If the specified value is less |
|||
#' than 3 characters long, the call returns an empty list. Default: none |
|||
#' @param query Return media with at least one sentence that matches this Solr |
|||
#' query; For a description of the Solr query format, see |
|||
#' <https://github.com/berkmancenter/mediacloud/blob/master/doc/api_2_0_spec/api_2_0_spec.md#apiv2stories_publiclist>. |
|||
#' Default: null |
|||
#' @param tags_id Return media associated with any of the given tags; If this is not |
|||
#' a lenth 1 character vector each vector element will be passed in as |
|||
#' an additional "tag id" list. These lists are AND'd together on the |
|||
#' server. Default: null |
|||
#' @param last_media_id Return media sources with a media_id greater than this value; Default: 0 |
|||
#' @param rows Number of media sources to return. Cannot be larger than 100; Default: 20 |
|||
#' @param tag_name Name of tag for which to return belonging media; Default: none |
|||
#' @param timespans_id Return media within the given timespan; If this parameter |
|||
#' is specified, return media within the given time slice, sorted by |
|||
#' descending inlink_count within the timespan. If topic_mode is set to |
|||
#' 'live', return media from the live topic stories rather than from |
|||
#' the frozen snapshot. Default: null |
|||
#' @param topic_mode If set to 'live', return media from live topics; Default: null |
|||
#' @param include_dups Include duplicate media among the results; Default: 0 |
|||
#' @param unhealthy Only return media that are currently marked as unhealthy (see mediahealth/list); Default: none |
|||
#' @param similar_media_id Return media with the most tags in common; Default: none |
|||
#' @param sort sort order of media: id, or num_stories; By default, media are |
|||
#' sorted by media_id. If this parameter is set to 'num_stories', |
|||
#' the media will be sorted by decreasing number of stories in the past |
|||
#' 90 days |
|||
#' @param ... passed on to `httr` verb calls |
|||
#' @param api_key See [mediacloud_api_key()] |
|||
#' @note By default, calls that specify a name parameter will only return media |
|||
#' that are not duplicates of some other media source. Media Cloud has |
|||
#' many media sources that are either subsets of other media sources or |
|||
#' are just holders for spidered media from a given media source, both of |
|||
#' which are marked as duplicate media and are not included in the |
|||
#' default results. If the 'include_dups' parameter is set to 1, those |
|||
#' duplicate sources will be included in the results. |
|||
#' @export |
|||
mc_media_list <- function(name = NULL, query = NULL, tags_id = NULL, |
|||
last_media_id = 0, rows = 0, tag_name = NULL, |
|||
timespans_id = NULL, topic_mode = NULL, |
|||
include_dups = 0, unhealthy = NULL, |
|||
similar_media_id = NULL, sort = c("id", "num_stories"), |
|||
..., api_key = mediacloud_api_key()) { |
|||
|
|||
sort <- match.arg(sort[1], choices = c("id", "num_stories"), several.ok = FALSE) |
|||
|
|||
unhealthy <- numerify(unhealthy) |
|||
similar_media_id <- numerify(similar_media_id) |
|||
|
|||
list( |
|||
last_media_id = last_media_id, |
|||
rows = rows, |
|||
name = name, |
|||
tag_name = tag_name, |
|||
timespans_id = timespans_id, |
|||
topic_mode = topic_mode, |
|||
q = query, |
|||
include_dups = include_dups, |
|||
unhealthy = unhealthy, |
|||
similar_media_id = similar_media_id, |
|||
sort = sort, |
|||
key = api_key |
|||
) -> params |
|||
|
|||
if (length(tags_id) < 2) { |
|||
params[["tags_id"]] <- tags_id |
|||
} else { |
|||
tags_ids <- c("tags_id", sprintf("tags_id_%s", 1:(length(tags_id)-1))) |
|||
params <- append(params, as.list(set_names(tags_id, tags_ids))) |
|||
} |
|||
|
|||
httr::GET( |
|||
url = "https://api.mediacloud.org/api/v2/media/list", |
|||
query = params, |
|||
.MEDIACLOUD_UA, |
|||
... |
|||
) -> res |
|||
|
|||
httr::stop_for_status(res) |
|||
|
|||
out <- httr::content(res, as = "text", encoding = "UTF-8") |
|||
out <- jsonlite::fromJSON(out) |
|||
|
|||
class(out) <- c("tbl_df", "tbl", "data.frame") |
|||
|
|||
out |
|||
|
|||
} |
@ -1,12 +1,14 @@ |
|||
#' ... |
|||
#' |
|||
#' |
|||
#' - URL: <https://gitlab.com/hrbrmstr/mediacloud> |
|||
#' - BugReports: <https://gitlab.com/hrbrmstr/mediacloud/issues> |
|||
#' |
|||
#' |
|||
#' @md |
|||
#' @name mediacloud |
|||
#' @keywords internal |
|||
#' @author Bob Rudis (bob@@rud.is) |
|||
#' @import httr |
|||
#' @import stringi |
|||
#' @importFrom scales comma |
|||
#' @importFrom jsonlite fromJSON |
|||
"_PACKAGE" |
|||
|
@ -0,0 +1,83 @@ |
|||
#' Return basic summary stats about total sources, stories, feeds, etc |
|||
#' processed by Media Cloud |
|||
#' |
|||
#' @param ... passed on to `httr` verb calls |
|||
#' @param api_key See [mediacloud_api_key()] |
|||
#' @export |
|||
mc_stats <- function(..., api_key = mediacloud_api_key()) { |
|||
|
|||
httr::GET( |
|||
url = "https://api.mediacloud.org/api/v2/stats/list", |
|||
query = list( |
|||
key = api_key |
|||
), |
|||
.MEDIACLOUD_UA, |
|||
... |
|||
) -> res |
|||
|
|||
httr::stop_for_status(res) |
|||
|
|||
out <- httr::content(res, as = "text", encoding = "UTF-8") |
|||
out <- jsonlite::fromJSON(out) |
|||
|
|||
class(out) <- "mediacloud_stats" |
|||
|
|||
out |
|||
|
|||
} |
|||
|
|||
#' printer |
|||
#' |
|||
#' @param x,... see [print()] |
|||
#' @noRd |
|||
#' @export |
|||
print.mediacloud_stats <- function(x, ...) { |
|||
|
|||
pretty_names <- names(x) |
|||
pretty_names <- pretty_names[!(pretty_names %in% c("stats_date", "mediacloud_stats_id"))] |
|||
pretty_vals <- x[pretty_names] |
|||
pretty_names <- stri_replace_all_fixed(pretty_names, "_", " ") |
|||
pretty_names <- stri_trans_totitle(pretty_names) |
|||
pretty_names <- stri_pad_left(pretty_names, max(nchar(pretty_names))) |
|||
|
|||
pretty_vals <- scales::comma(unlist(pretty_vals)) |
|||
|
|||
cat( |
|||
"Media Cloud summary stats as of ", x[["stats_date"]], "\n\n", |
|||
paste0(sprintf("%s: %s", pretty_names, pretty_vals), collapse = "\n"), "\n", |
|||
sep = "" |
|||
) |
|||
|
|||
} |
|||
|
|||
## $active_crawled_feeds |
|||
## [1] 176064 |
|||
## |
|||
## $active_crawled_media |
|||
## [1] 59067 |
|||
## |
|||
## $daily_downloads |
|||
## [1] 1475208 |
|||
## |
|||
## $daily_stories |
|||
## [1] 696983 |
|||
## |
|||
## $mediacloud_stats_id |
|||
## [1] 544 |
|||
## |
|||
## $stats_date |
|||
## [1] "2019-07-21" |
|||
## |
|||
## $total_downloads |
|||
## [1] 0 |
|||
## |
|||
## $total_sentences |
|||
## [1] 0 |
|||
## |
|||
## $total_stories |
|||
## [1] 1272053504 |
|||
## |
|||
# list(active_crawled_feeds = 176064L, active_crawled_media = 59067L, |
|||
# daily_downloads = 1475208L, daily_stories = 696983L, mediacloud_stats_id = 544L, |
|||
# stats_date = "2019-07-21", total_downloads = 0L, total_sentences = 0L, |
|||
# total_stories = 1272053504L) -> x |
@ -0,0 +1,13 @@ |
|||
numerify <- function(x) { |
|||
if (length(x)) { |
|||
if (inherits(x, "logical")) { |
|||
x <- as.integer(x) |
|||
} |
|||
} |
|||
x |
|||
} |
|||
|
|||
set_names <- function (object = nm, nm) { |
|||
names(object) <- nm |
|||
object |
|||
} |
@ -0,0 +1,69 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/media-list.R |
|||
\name{mc_media_list} |
|||
\alias{mc_media_list} |
|||
\title{Return multiple media sources} |
|||
\usage{ |
|||
mc_media_list(name = NULL, query = NULL, tags_id = NULL, |
|||
last_media_id = 0, rows = 0, tag_name = NULL, |
|||
timespans_id = NULL, topic_mode = NULL, include_dups = 0, |
|||
unhealthy = NULL, similar_media_id = NULL, sort = c("id", |
|||
"num_stories"), ..., api_key = mediacloud_api_key()) |
|||
} |
|||
\arguments{ |
|||
\item{name}{Name of media source for which to search; If this parameter is |
|||
specified, the call returns only media sources that match a case |
|||
insensitive search specified value. If the specified value is less |
|||
than 3 characters long, the call returns an empty list. Default: none} |
|||
|
|||
\item{query}{Return media with at least one sentence that matches this Solr |
|||
query; For a description of the Solr query format, see |
|||
\url{https://github.com/berkmancenter/mediacloud/blob/master/doc/api_2_0_spec/api_2_0_spec.md#apiv2stories_publiclist}. |
|||
Default: null} |
|||
|
|||
\item{tags_id}{Return media associated with any of the given tags; If this is not |
|||
a lenth 1 character vector each vector element will be passed in as |
|||
an additional "tag id" list. These lists are AND'd together on the |
|||
server. Default: null} |
|||
|
|||
\item{last_media_id}{Return media sources with a media_id greater than this value; Default: 0} |
|||
|
|||
\item{rows}{Number of media sources to return. Cannot be larger than 100; Default: 20} |
|||
|
|||
\item{tag_name}{Name of tag for which to return belonging media; Default: none} |
|||
|
|||
\item{timespans_id}{Return media within the given timespan; If this parameter |
|||
is specified, return media within the given time slice, sorted by |
|||
descending inlink_count within the timespan. If topic_mode is set to |
|||
'live', return media from the live topic stories rather than from |
|||
the frozen snapshot. Default: null} |
|||
|
|||
\item{topic_mode}{If set to 'live', return media from live topics; Default: null} |
|||
|
|||
\item{include_dups}{Include duplicate media among the results; Default: 0} |
|||
|
|||
\item{unhealthy}{Only return media that are currently marked as unhealthy (see mediahealth/list); Default: none} |
|||
|
|||
\item{similar_media_id}{Return media with the most tags in common; Default: none} |
|||
|
|||
\item{sort}{sort order of media: id, or num_stories; By default, media are |
|||
sorted by media_id. If this parameter is set to 'num_stories', |
|||
the media will be sorted by decreasing number of stories in the past |
|||
90 days} |
|||
|
|||
\item{...}{passed on to \code{httr} verb calls} |
|||
|
|||
\item{api_key}{See \code{\link[=mediacloud_api_key]{mediacloud_api_key()}}} |
|||
} |
|||
\description{ |
|||
Return multiple media sources |
|||
} |
|||
\note{ |
|||
By default, calls that specify a name parameter will only return media |
|||
that are not duplicates of some other media source. Media Cloud has |
|||
many media sources that are either subsets of other media sources or |
|||
are just holders for spidered media from a given media source, both of |
|||
which are marked as duplicate media and are not included in the |
|||
default results. If the 'include_dups' parameter is set to 1, those |
|||
duplicate sources will be included in the results. |
|||
} |
@ -0,0 +1,18 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/stats.R |
|||
\name{mc_stats} |
|||
\alias{mc_stats} |
|||
\title{Return basic summary stats about total sources, stories, feeds, etc |
|||
processed by Media Cloud} |
|||
\usage{ |
|||
mc_stats(..., api_key = mediacloud_api_key()) |
|||
} |
|||
\arguments{ |
|||
\item{...}{passed on to \code{httr} verb calls} |
|||
|
|||
\item{api_key}{See \code{\link[=mediacloud_api_key]{mediacloud_api_key()}}} |
|||
} |
|||
\description{ |
|||
Return basic summary stats about total sources, stories, feeds, etc |
|||
processed by Media Cloud |
|||
} |
@ -0,0 +1,29 @@ |
|||
% Generated by roxygen2: do not edit by hand |
|||
% Please edit documentation in R/api-key.r |
|||
\name{mediacloud_api_key} |
|||
\alias{mediacloud_api_key} |
|||
\title{Get or set MEDIACLOUD_API_KEY value} |
|||
\usage{ |
|||
mediacloud_api_key(force = FALSE) |
|||
} |
|||
\arguments{ |
|||
\item{force}{force setting a new Media Cloud API key for the current environment?} |
|||
} |
|||
\value{ |
|||
atomic character vector containing the Media Cloud API key |
|||
} |
|||
\description{ |
|||
The API wrapper functions in this package all rely on a Media Cloud API |
|||
key residing in the environment variable \code{MEDIACLOUD_API_KEY}. The |
|||
easiest way to accomplish this is to set it in the \code{\code{.Renviron}} file in your |
|||
home directory. |
|||
} |
|||
\details{ |
|||
Every API call must include a key parameter which will authenticate you to |
|||
the API service. To get a key, register for a user via |
|||
\url{https://topics.mediacloud.org/#/user/signup}. Once you have an account go |
|||
here to see your key \url{https://topics.mediacloud.org/#/user/profile}. |
|||
} |
|||
\references{ |
|||
\url{https://github.com/berkmancenter/mediacloud/blob/master/doc/api_2_0_spec/api_2_0_spec.md} |
|||
} |
Loading…
Reference in new issue