Data from Web of Science.

Query: TOPIC: (privacy AND student)
Timespan: All years.
Indexes: SCI-EXPANDED, SSCI, A&HCI, CPCI-S, CPCI-SSH, BKCI-S, BKCI-SSH, ESCI, CCR-EXPANDED, IC.

Read in the data

setwd(here::here("data", "WoS"))
files <- dir(full.names = TRUE, pattern = ".txt")
source("col_specs.R")
dat <- map_dfr(files, read_csv, skip = 3, col_type = col_specs)

Explore dataset

skimr::skim(dat)
Data summary
Name dat
Number of rows 2185
Number of columns 78
_______________________
Column type frequency:
character 18
numeric 60
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Title 0 1.00 4 238 0 2183 0
Authors 8 1.00 6 360 0 2143 0
Corporate Authors 2181 0.00 9 55 0 4 0
Editors 2183 0.00 28 63 0 2 0
Book Editors 1749 0.20 5 184 0 290 0
Source Title 1 1.00 4 249 0 1390 0
Publication Date 44 0.98 4 12 0 553 0
Publication Year 4 1.00 1 10 0 39 0
Volume 582 0.73 1 5 0 215 0
Issue 881 0.60 1 5 0 56 0
Supplement 2174 0.01 1 2 0 6 0
Special Issue 2061 0.06 1 3 0 9 0
Beginning Page 312 0.86 1 6 0 931 0
Ending Page 315 0.86 1 6 0 940 0
Article Number 1952 0.11 1 29 0 214 0
DOI 610 0.72 13 87 0 1575 0
Conference Title 1582 0.28 11 247 0 472 0
Conference Date 1579 0.28 1 19 0 463 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Part Number 2181 0 10.50 17.67 1 1.75 2.00 10.75 37.00 ▇▁▁▁▂
Total Citations 1 1 11.22 44.02 0 0.00 2.00 9.00 1370.00 ▇▁▁▁▁
Average per Year 0 1 1.28 3.29 0 0.00 0.33 1.32 51.00 ▇▁▁▁▁
1965 0 1 0.00 0.02 0 0.00 0.00 0.00 0.82 ▇▁▁▁▁
1966 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1967 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1968 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1969 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1970 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1971 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1972 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1973 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1974 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1975 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1976 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1977 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1978 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1979 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1980 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1981 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1982 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1983 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1984 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1985 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1986 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1987 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1988 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1989 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1990 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1991 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1992 0 1 0.00 0.00 0 0.00 0.00 0.00 0.00 ▁▁▇▁▁
1993 0 1 0.00 0.05 0 0.00 0.00 0.00 2.00 ▇▁▁▁▁
1994 0 1 0.00 0.04 0 0.00 0.00 0.00 1.00 ▇▁▁▁▁
1995 0 1 0.01 0.13 0 0.00 0.00 0.00 4.00 ▇▁▁▁▁
1996 0 1 0.01 0.30 0 0.00 0.00 0.00 10.00 ▇▁▁▁▁
1997 0 1 0.02 0.58 0 0.00 0.00 0.00 24.00 ▇▁▁▁▁
1998 0 1 0.03 0.66 0 0.00 0.00 0.00 24.00 ▇▁▁▁▁
1999 0 1 0.04 0.77 0 0.00 0.00 0.00 25.00 ▇▁▁▁▁
2000 0 1 0.05 0.91 0 0.00 0.00 0.00 33.00 ▇▁▁▁▁
2001 0 1 0.05 0.95 0 0.00 0.00 0.00 38.00 ▇▁▁▁▁
2002 0 1 0.06 1.18 0 0.00 0.00 0.00 46.00 ▇▁▁▁▁
2003 0 1 0.07 1.20 0 0.00 0.00 0.00 49.00 ▇▁▁▁▁
2004 0 1 0.07 1.15 0 0.00 0.00 0.00 43.00 ▇▁▁▁▁
2005 0 1 0.10 1.42 0 0.00 0.00 0.00 59.00 ▇▁▁▁▁
2006 0 1 0.11 1.25 0 0.00 0.00 0.00 45.00 ▇▁▁▁▁
2007 0 1 0.14 1.75 0 0.00 0.00 0.00 70.00 ▇▁▁▁▁
2008 0 1 0.15 1.51 0 0.00 0.00 0.00 60.00 ▇▁▁▁▁
2009 0 1 0.18 1.50 0 0.00 0.00 0.00 51.00 ▇▁▁▁▁
2010 0 1 0.22 1.64 0 0.00 0.00 0.00 58.00 ▇▁▁▁▁
2011 0 1 0.29 2.19 0 0.00 0.00 0.00 75.00 ▇▁▁▁▁
2012 0 1 0.37 2.24 0 0.00 0.00 0.00 62.00 ▇▁▁▁▁
2013 0 1 0.48 2.90 0 0.00 0.00 0.00 83.00 ▇▁▁▁▁
2014 0 1 0.61 3.15 0 0.00 0.00 0.00 74.00 ▇▁▁▁▁
2015 0 1 0.79 3.75 0 0.00 0.00 0.00 77.00 ▇▁▁▁▁
2016 0 1 0.97 4.06 0 0.00 0.00 0.00 71.00 ▇▁▁▁▁
2017 0 1 1.11 4.49 0 0.00 0.00 1.00 86.00 ▇▁▁▁▁
2018 0 1 1.25 4.46 0 0.00 0.00 1.00 75.00 ▇▁▁▁▁
2019 0 1 1.69 5.19 0 0.00 0.00 1.00 103.00 ▇▁▁▁▁
2020 0 1 1.95 5.24 0 0.00 0.00 2.00 81.00 ▇▁▁▁▁
2021 7 1 0.42 1.27 0 0.00 0.00 0.00 20.00 ▇▁▁▁▁

Wrangle & Clean data

# Clean names
names(dat)[!str_detect(names(dat), "\\d")] <-
  janitor::make_clean_names(names(dat)[!stringr::str_detect(names(dat), "\\d")])

# format date
dat <- dat %>%
  mutate(
    publication_year = publication_year %>% lubridate::ymd(truncated = 2L)
  )

Plot Time series

published <- dat %>%
  group_by(publication_year) %>%
  summarise(n = n()) %>%
  filter(publication_year < Sys.Date()) # filter out flawed year entries

ggplot(published, aes(x = publication_year, y = n)) +
  geom_area(fill = "#689f38", color = "#387002", alpha = 0.6) +
  scale_x_date(
    limit = c(as.Date("2001-01-01"), as.Date("2020-01-01")),
    date_breaks = "1 year", labels = scales::date_format("%y")
  ) +
  labs(
    title = title,
    subtitle = "New Publications per Year",
    caption = caption,
    x = "Publication Date", y = ""
  ) + theme_lk

cited <- dat %>%
  select("1965":"2021") %>%
  summarise(across(everything(), sum, na.rm = TRUE)) %>%
  pivot_longer(everything(), names_to = "year", values_to = "cited_n") %>%
  mutate(
    year = year %>% lubridate::ymd(truncated = 2L)
  )

ggplot(cited, aes(x = year, y = cited_n)) +
  geom_area(fill = "#039be5", color = "#006db3", alpha = 0.6) +
  scale_x_date(
    limit = c(as.Date("2001-01-01"), as.Date("2020-01-01")),
    date_breaks = "1 year", labels = scales::date_format("%y")
  ) +
  labs(
    title = title,
    subtitle = "Sum of Times Cited per Year",
    caption = caption,
    x = "Publication Date", y = ""
  ) +
  theme_lk