Skip to content

Commit eb5d52a

Browse files
authored
Data tweaks (#94)
* Fix typo in gutenberg_languages parser * Add better checks for data.
1 parent d8d792c commit eb5d52a

File tree

7 files changed

+93
-5
lines changed

7 files changed

+93
-5
lines changed

‎NEWS.md‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
* Make compatible with tidyr v1.0.0
3737
* data_frame is deprecated, use tibble (thanks @evanodell for #21)
38-
* ROpenSci updates to README (thanks @maelle for #23)
38+
* rOpenSci updates to README (thanks @maelle for #23)
3939

4040
# gutenbergr 0.1.4
4141

@@ -60,7 +60,7 @@
6060
* Added `all_languages` and `only_languages` arguments to `gutenberg_works`, allowing fine-grained control of languages. (For example, "either English or French" or "both English and French")
6161
* Changed get_gutenberg_mirror to use xml2 directly, in order to handle AppVeyor
6262
* Removed use of data() in `gutenberg_works`, since it slows down `gutenberg_works` about 2X
63-
* Various documentation, vignette, and README adjustments in response to ROpenSci feedback.
63+
* Various documentation, vignette, and README adjustments in response to rOpenSci feedback.
6464
* Added AppVeyor for Windows continuous integration
6565
* Added code coverage information through codecov.io and covr, along with tests to improve coverage
6666

‎R/data.R‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#' \item{title}{Title}
1414
#' \item{author}{Author, if a single one given. Given as last name
1515
#' first (e.g. "Doyle, Arthur Conan")}
16-
#' \item{author_id}{Project Gutenberg author ID}
16+
#' \item{gutenberg_author_id}{Project Gutenberg author ID}
1717
#' \item{language}{Language ISO 639 code, separated by / if multiple. Two
1818
#' letter code if one exists, otherwise three letter. See
1919
#' \url{https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes}}

‎data-raw/parse_rdfs.R‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ new_gutenberg_languages <- purrr::map(all_metadata, ~ .x$languages) |>
3232
purrr::list_rbind() |>
3333
dplyr::distinct() |>
3434
dplyr::arrange(gutenberg_id, language) |>
35-
dplyr::mutate(lanuage = as.factor(language))
35+
dplyr::mutate(language = as.factor(language))
3636

3737
new_gutenberg_metadata <- purrr::map(all_metadata, ~ .x$metadata) |>
3838
purrr::list_rbind() |>

‎data/gutenberg_languages.rda‎

-16.1 KB
Binary file not shown.

‎inst/WORDLIST‎

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
AppVeyor
2+
CMD
3+
Codecov
4+
ORCID
5+
README
6+
WikipediR
7+
Wuthering
8+
audiobooks
9+
codecov
10+
covr
11+
df
12+
dplyr
13+
gutenberg
14+
http
15+
https
16+
humaniformat
17+
io
18+
lcc
19+
lcsh
20+
magrittr
21+
pageview
22+
pkgdown
23+
pre
24+
rOpenSci
25+
ropensci
26+
ropenscilabs
27+
roxygen
28+
rvest
29+
tbl
30+
tibble
31+
tidyr
32+
tidytext
33+
tm
34+
tokenization
35+
wikipediatrend

‎man/gutenberg_metadata.Rd‎

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎tests/testthat/test-data.R‎

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,56 @@ test_that("All four datasets have a date-updated", {
99
expect_s3_class(d3, "Date")
1010
expect_s3_class(d4, "Date")
1111
})
12+
13+
test_that("gutenberg_metadata has the expected shape", {
14+
expect_s3_class(gutenberg_metadata, c("tbl_df", "tbl", "data.frame"))
15+
expect_named(
16+
gutenberg_metadata,
17+
c(
18+
"gutenberg_id",
19+
"title",
20+
"author",
21+
"gutenberg_author_id",
22+
"language",
23+
"gutenberg_bookshelf",
24+
"rights",
25+
"has_text"
26+
)
27+
)
28+
expect_gte(nrow(gutenberg_metadata), 79491)
29+
})
30+
31+
test_that("gutenberg_subjects has the expected shape", {
32+
expect_s3_class(gutenberg_subjects, c("tbl_df", "tbl", "data.frame"))
33+
expect_named(
34+
gutenberg_subjects,
35+
c("gutenberg_id", "subject_type", "subject")
36+
)
37+
expect_gte(nrow(gutenberg_subjects), 255000)
38+
})
39+
40+
test_that("gutenberg_authors has the expected shape", {
41+
expect_s3_class(gutenberg_authors, c("tbl_df", "tbl", "data.frame"))
42+
expect_named(
43+
gutenberg_authors,
44+
c(
45+
"gutenberg_author_id",
46+
"author",
47+
"alias",
48+
"birthdate",
49+
"deathdate",
50+
"wikipedia",
51+
"aliases"
52+
)
53+
)
54+
expect_gte(nrow(gutenberg_authors), 26000)
55+
})
56+
57+
test_that("gutenberg_languages has the expected shape", {
58+
expect_s3_class(gutenberg_languages, c("tbl_df", "tbl", "data.frame"))
59+
expect_named(
60+
gutenberg_languages,
61+
c("gutenberg_id", "language", "total_languages")
62+
)
63+
expect_gte(nrow(gutenberg_languages), 76000)
64+
})

0 commit comments

Comments
 (0)