Bisherige Variablen aus Rtweet

rtweet_cols <-
  c(
    user_id = "id_str",
    name = "name",
    screen_name = "screen_name",
    location = "location",
    description = "description",
    url = "url",
    protected = "protected",
    followers_count = "followers_count",
    friends_count = "friends_count",
    listed_count = "listed_count",
    statuses_count = "statuses_count",
    favourites_count = "favourites_count",
    account_created_at = "created_at",
    verified = "verified",
    profile_url = "profile_url",
    profile_expanded_url = "profile_expanded_url",
    account_lang = "lang",
    profile_banner_url = "profile_banner_url",
    profile_background_url = "profile_background_image_url",
    profile_image_url = "profile_image_url",

    status_id = "id_str",
    created_at = "created_at",
    user_id = "user_id",
    screen_name = "screen_name",
    text = "text",
    source = "source",
    display_text_width = "display_text_width",
    reply_to_status_id = "in_reply_to_status_id_str",
    reply_to_user_id = "in_reply_to_user_id_str",
    reply_to_screen_name = "in_reply_to_screen_name",
    is_quote = "is_quote",
    is_retweet = "is_retweet",
    favorite_count = "favorite_count",
    retweet_count = "retweet_count",
    quote_count = "quote_count",
    reply_count = "reply_count",
    hashtags = "hashtags",
    symbols = "symbols",
    urls_url = "urls_url",
    urls_t.co = "urls_t.co",
    urls_expanded_url = "urls_expanded_url",
    media_url = "media_url",
    media_t.co = "media_t.co",
    media_expanded_url = "media_expanded_url",
    media_type = "media_type",
    ext_media_url = "ext_media_url",
    ext_media_t.co = "ext_media_t.co",
    ext_media_expanded_url = "ext_media_expanded_url",
    ext_media_type = "ext_media_expanded_type",
    mentions_user_id = "mentions_user_id",
    mentions_screen_name = "mentions_screen_name",
    lang = "lang",
    quoted_status_id = "quoted_status_id",
    quoted_text = "quoted_text",
    quoted_created_at = "quoted_created_at",
    quoted_source = "quoted_source",
    quoted_favorite_count = "quoted_favorite_count",
    quoted_retweet_count = "quoted_retweet_count",
    quoted_user_id = "quoted_user_id",
    quoted_screen_name = "quoted_screen_name",
    quoted_name = "quoted_name",
    quoted_followers_count = "quoted_followers_count",
    quoted_friends_count = "quoted_friends_count",
    quoted_statuses_count = "quoted_statuses_count",
    quoted_location = "quoted_location",
    quoted_description = "quoted_description",
    quoted_verified = "quoted_verified",
    retweet_status_id = "retweet_status_id",
    retweet_text = "retweet_text",
    retweet_created_at = "retweet_created_at",
    retweet_source = "retweet_source",
    retweet_favorite_count = "retweet_favorite_count",
    retweet_retweet_count = "retweet_retweet_count",
    retweet_user_id = "retweet_user_id",
    retweet_screen_name = "retweet_screen_name",
    retweet_name = "retweet_name",
    retweet_followers_count = "retweet_followers_count",
    retweet_friends_count = "retweet_friends_count",
    retweet_statuses_count = "retweet_statuses_count",
    retweet_location = "retweet_location",
    retweet_description = "retweet_description",
    retweet_verified = "retweet_verified",
    place_url = "place_url",
    place_name = "place_name",
    place_full_name = "place_full_name",
    place_type = "place_type",
    country = "country",
    country_code = "country_code",
    geo_coords = "geo_coords",
    coords_coords = "coordinates_coords",
    bbox_coords = "bbox_coords"
  )

Import JSON Dataset von Twurl

json_data <- fromJSON(txt = "tweets_dl.json")
json_data$requestParameters
## $maxResults
## [1] 500
## 
## $fromDate
## [1] "200603220000"
## 
## $toDate
## [1] "202009252359"
dat <- json_data$results %>% as.data.frame()

Nested DataFrame

ncol(dat)
## [1] 37
glimpse(dat)
## Rows: 500
## Columns: 37
## $ created_at                <chr> "Thu Sep 24 18:57:37 +0000 2020", "Tue Sep …
## $ id                        <dbl> 1.309205e+18, 1.308458e+18, 1.307026e+18, 1…
## $ id_str                    <chr> "1309205362776498176", "1308457817259405313…
## $ text                      <chr> "Wondering how to process and store the Twe…
## $ source                    <chr> "<a href=\"https://mobile.twitter.com\" rel…
## $ truncated                 <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRU…
## $ in_reply_to_status_id     <dbl> NA, NA, 1.304103e+18, NA, 1.306292e+18, NA,…
## $ in_reply_to_status_id_str <chr> NA, NA, "1304102743196356610", NA, "1306292…
## $ in_reply_to_user_id       <dbl> NA, NA, 2244994945, NA, 2244994945, NA, NA,…
## $ in_reply_to_user_id_str   <chr> NA, NA, "2244994945", NA, "2244994945", NA,…
## $ in_reply_to_screen_name   <chr> NA, NA, "TwitterDev", NA, "TwitterDev", NA,…
## $ user                      <df[,39]> <data.frame[26 x 39]>
## $ geo                       <df[,2]> <data.frame[26 x 2]>
## $ coordinates               <df[,2]> <data.frame[26 x 2]>
## $ place                     <df[,9]> <data.frame[26 x 9]>
## $ contributors              <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ is_quote_status           <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ extended_tweet            <df[,4]> <data.frame[26 x 4]>
## $ quote_count               <int> 2, 0, 1, 0, 0, 4, 4, 1, 0, 5, 0, 4, 0, 2, 0…
## $ reply_count               <int> 6, 0, 2, 0, 3, 2, 11, 6, 0, 16, 0, 3, 0, 4,…
## $ retweet_count             <int> 20, 0, 10, 0, 2, 14, 30, 11, 0, 30, 0, 9, 0…
## $ favorite_count            <int> 69, 0, 68, 0, 22, 62, 101, 57, 0, 103, 0, 5…
## $ entities                  <df[,5]> <data.frame[26 x 5]>
## $ favorited                 <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ retweeted                 <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ possibly_sensitive        <lgl> FALSE, NA, FALSE, NA, FALSE, FALSE, FALSE, …
## $ filter_level              <chr> "low", "low", "low", "low", "low", "low", "…
## $ lang                      <chr> "en", "en", "en", "en", "en", "en", "en", "…
## $ matching_rules            <list> [<data.frame[1 x 1]>, <data.frame[1 x 1]>,…
## $ retweeted_status          <df[,34]> <data.frame[26 x 34]>
## $ display_text_range        <list> [NULL, NULL, NULL, NULL, NULL, NULL, <0, 1…
## $ quoted_status_id          <dbl> NA, NA, NA, NA, NA, NA, NA, 1.303722e+18, N…
## $ quoted_status_id_str      <chr> NA, NA, NA, NA, NA, NA, NA, "13037223575892…
## $ quoted_status             <df[,32]> <data.frame[26 x 32]>
## $ quoted_status_permalink   <df[,3]> <data.frame[26 x 3]>
## $ extended_entities         <df[,1]> <data.frame[26 x 1]>
## $ scopes                    <df[,1]> <data.frame[26 x 1]>

Unnested DataFrame

# Set Parsing Mode to flatten = TRUE
json_data2 <- fromJSON(txt = "tweets_dl.json", flatten = TRUE)
dat2 <- json_data2$results %>% as.data.frame()

ncol(dat2)
## [1] 354
# > 354 (WTF?!)
glimpse(dat2)
## Rows: 500
## Columns: 354
## $ created_at                                                             <chr> …
## $ id                                                                     <dbl> …
## $ id_str                                                                 <chr> …
## $ text                                                                   <chr> …
## $ source                                                                 <chr> …
## $ truncated                                                              <lgl> …
## $ in_reply_to_status_id                                                  <dbl> …
## $ in_reply_to_status_id_str                                              <chr> …
## $ in_reply_to_user_id                                                    <dbl> …
## $ in_reply_to_user_id_str                                                <chr> …
## $ in_reply_to_screen_name                                                <chr> …
## $ contributors                                                           <lgl> …
## $ is_quote_status                                                        <lgl> …
## $ quote_count                                                            <int> …
## $ reply_count                                                            <int> …
## $ retweet_count                                                          <int> …
## $ favorite_count                                                         <int> …
## $ favorited                                                              <lgl> …
## $ retweeted                                                              <lgl> …
## $ possibly_sensitive                                                     <lgl> …
## $ filter_level                                                           <chr> …
## $ lang                                                                   <chr> …
## $ matching_rules                                                         <list> …
## $ display_text_range                                                     <list> …
## $ quoted_status_id                                                       <dbl> …
## $ quoted_status_id_str                                                   <chr> …
## $ user.id                                                                <dbl> …
## $ user.id_str                                                            <chr> …
## $ user.name                                                              <chr> …
## $ user.screen_name                                                       <chr> …
## $ user.location                                                          <chr> …
## $ user.url                                                               <chr> …
## $ user.description                                                       <chr> …
## $ user.translator_type                                                   <chr> …
## $ user.protected                                                         <lgl> …
## $ user.verified                                                          <lgl> …
## $ user.followers_count                                                   <int> …
## $ user.friends_count                                                     <int> …
## $ user.listed_count                                                      <int> …
## $ user.favourites_count                                                  <int> …
## $ user.statuses_count                                                    <int> …
## $ user.created_at                                                        <chr> …
## $ user.utc_offset                                                        <lgl> …
## $ user.time_zone                                                         <lgl> …
## $ user.geo_enabled                                                       <lgl> …
## $ user.lang                                                              <lgl> …
## $ user.contributors_enabled                                              <lgl> …
## $ user.is_translator                                                     <lgl> …
## $ user.profile_background_color                                          <chr> …
## $ user.profile_background_image_url                                      <chr> …
## $ user.profile_background_image_url_https                                <chr> …
## $ user.profile_background_tile                                           <lgl> …
## $ user.profile_link_color                                                <chr> …
## $ user.profile_sidebar_border_color                                      <chr> …
## $ user.profile_sidebar_fill_color                                        <chr> …
## $ user.profile_text_color                                                <chr> …
## $ user.profile_use_background_image                                      <lgl> …
## $ user.profile_image_url                                                 <chr> …
## $ user.profile_image_url_https                                           <chr> …
## $ user.profile_banner_url                                                <chr> …
## $ user.default_profile                                                   <lgl> …
## $ user.default_profile_image                                             <lgl> …
## $ user.following                                                         <lgl> …
## $ user.follow_request_sent                                               <lgl> …
## $ user.notifications                                                     <lgl> …
## $ geo.type                                                               <chr> …
## $ geo.coordinates                                                        <list> …
## $ coordinates.type                                                       <chr> …
## $ coordinates.coordinates                                                <list> …
## $ place.id                                                               <chr> …
## $ place.url                                                              <chr> …
## $ place.place_type                                                       <chr> …
## $ place.name                                                             <chr> …
## $ place.full_name                                                        <chr> …
## $ place.country_code                                                     <chr> …
## $ place.country                                                          <chr> …
## $ place.bounding_box.type                                                <chr> …
## $ place.bounding_box.coordinates                                         <list> …
## $ extended_tweet.full_text                                               <chr> …
## $ extended_tweet.display_text_range                                      <list> …
## $ extended_tweet.entities.hashtags                                       <list> …
## $ extended_tweet.entities.urls                                           <list> …
## $ extended_tweet.entities.user_mentions                                  <list> …
## $ extended_tweet.entities.symbols                                        <list> …
## $ extended_tweet.entities.media                                          <list> …
## $ extended_tweet.extended_entities.media                                 <list> …
## $ entities.hashtags                                                      <list> …
## $ entities.urls                                                          <list> …
## $ entities.user_mentions                                                 <list> …
## $ entities.symbols                                                       <list> …
## $ entities.media                                                         <list> …
## $ retweeted_status.created_at                                            <chr> …
## $ retweeted_status.id                                                    <dbl> …
## $ retweeted_status.id_str                                                <chr> …
## $ retweeted_status.text                                                  <chr> …
## $ retweeted_status.source                                                <chr> …
## $ retweeted_status.truncated                                             <lgl> …
## $ retweeted_status.in_reply_to_status_id                                 <dbl> …
## $ retweeted_status.in_reply_to_status_id_str                             <chr> …
## $ retweeted_status.in_reply_to_user_id                                   <dbl> …
## $ retweeted_status.in_reply_to_user_id_str                               <chr> …
## $ retweeted_status.in_reply_to_screen_name                               <chr> …
## $ retweeted_status.geo                                                   <lgl> …
## $ retweeted_status.coordinates                                           <lgl> …
## $ retweeted_status.contributors                                          <lgl> …
## $ retweeted_status.is_quote_status                                       <lgl> …
## $ retweeted_status.quote_count                                           <int> …
## $ retweeted_status.reply_count                                           <int> …
## $ retweeted_status.retweet_count                                         <int> …
## $ retweeted_status.favorite_count                                        <int> …
## $ retweeted_status.favorited                                             <lgl> …
## $ retweeted_status.retweeted                                             <lgl> …
## $ retweeted_status.possibly_sensitive                                    <lgl> …
## $ retweeted_status.filter_level                                          <chr> …
## $ retweeted_status.lang                                                  <chr> …
## $ retweeted_status.display_text_range                                    <list> …
## $ retweeted_status.quoted_status_id                                      <dbl> …
## $ retweeted_status.quoted_status_id_str                                  <chr> …
## $ retweeted_status.user.id                                               <dbl> …
## $ retweeted_status.user.id_str                                           <chr> …
## $ retweeted_status.user.name                                             <chr> …
## $ retweeted_status.user.screen_name                                      <chr> …
## $ retweeted_status.user.location                                         <chr> …
## $ retweeted_status.user.url                                              <chr> …
## $ retweeted_status.user.description                                      <chr> …
## $ retweeted_status.user.translator_type                                  <chr> …
## $ retweeted_status.user.protected                                        <lgl> …
## $ retweeted_status.user.verified                                         <lgl> …
## $ retweeted_status.user.followers_count                                  <int> …
## $ retweeted_status.user.friends_count                                    <int> …
## $ retweeted_status.user.listed_count                                     <int> …
## $ retweeted_status.user.favourites_count                                 <int> …
## $ retweeted_status.user.statuses_count                                   <int> …
## $ retweeted_status.user.created_at                                       <chr> …
## $ retweeted_status.user.utc_offset                                       <lgl> …
## $ retweeted_status.user.time_zone                                        <lgl> …
## $ retweeted_status.user.geo_enabled                                      <lgl> …
## $ retweeted_status.user.lang                                             <lgl> …
## $ retweeted_status.user.contributors_enabled                             <lgl> …
## $ retweeted_status.user.is_translator                                    <lgl> …
## $ retweeted_status.user.profile_background_color                         <chr> …
## $ retweeted_status.user.profile_background_image_url                     <chr> …
## $ retweeted_status.user.profile_background_image_url_https               <chr> …
## $ retweeted_status.user.profile_background_tile                          <lgl> …
## $ retweeted_status.user.profile_link_color                               <chr> …
## $ retweeted_status.user.profile_sidebar_border_color                     <chr> …
## $ retweeted_status.user.profile_sidebar_fill_color                       <chr> …
## $ retweeted_status.user.profile_text_color                               <chr> …
## $ retweeted_status.user.profile_use_background_image                     <lgl> …
## $ retweeted_status.user.profile_image_url                                <chr> …
## $ retweeted_status.user.profile_image_url_https                          <chr> …
## $ retweeted_status.user.default_profile                                  <lgl> …
## $ retweeted_status.user.default_profile_image                            <lgl> …
## $ retweeted_status.user.following                                        <lgl> …
## $ retweeted_status.user.follow_request_sent                              <lgl> …
## $ retweeted_status.user.notifications                                    <lgl> …
## $ retweeted_status.user.profile_banner_url                               <chr> …
## $ retweeted_status.place.id                                              <chr> …
## $ retweeted_status.place.url                                             <chr> …
## $ retweeted_status.place.place_type                                      <chr> …
## $ retweeted_status.place.name                                            <chr> …
## $ retweeted_status.place.full_name                                       <chr> …
## $ retweeted_status.place.country_code                                    <chr> …
## $ retweeted_status.place.country                                         <chr> …
## $ retweeted_status.place.bounding_box.type                               <chr> …
## $ retweeted_status.place.bounding_box.coordinates                        <list> …
## $ retweeted_status.entities.hashtags                                     <list> …
## $ retweeted_status.entities.urls                                         <list> …
## $ retweeted_status.entities.user_mentions                                <list> …
## $ retweeted_status.entities.symbols                                      <list> …
## $ retweeted_status.entities.media                                        <list> …
## $ retweeted_status.extended_tweet.full_text                              <chr> …
## $ retweeted_status.extended_tweet.display_text_range                     <list> …
## $ retweeted_status.extended_tweet.entities.hashtags                      <list> …
## $ retweeted_status.extended_tweet.entities.urls                          <list> …
## $ retweeted_status.extended_tweet.entities.user_mentions                 <list> …
## $ retweeted_status.extended_tweet.entities.symbols                       <list> …
## $ retweeted_status.extended_tweet.entities.media                         <list> …
## $ retweeted_status.extended_tweet.extended_entities.media                <list> …
## $ retweeted_status.quoted_status.created_at                              <chr> …
## $ retweeted_status.quoted_status.id                                      <dbl> …
## $ retweeted_status.quoted_status.id_str                                  <chr> …
## $ retweeted_status.quoted_status.text                                    <chr> …
## $ retweeted_status.quoted_status.display_text_range                      <list> …
## $ retweeted_status.quoted_status.source                                  <chr> …
## $ retweeted_status.quoted_status.truncated                               <lgl> …
## $ retweeted_status.quoted_status.in_reply_to_status_id                   <lgl> …
## $ retweeted_status.quoted_status.in_reply_to_status_id_str               <lgl> …
## $ retweeted_status.quoted_status.in_reply_to_user_id                     <lgl> …
## $ retweeted_status.quoted_status.in_reply_to_user_id_str                 <lgl> …
## $ retweeted_status.quoted_status.in_reply_to_screen_name                 <lgl> …
## $ retweeted_status.quoted_status.geo                                     <lgl> …
## $ retweeted_status.quoted_status.coordinates                             <lgl> …
## $ retweeted_status.quoted_status.place                                   <lgl> …
## $ retweeted_status.quoted_status.contributors                            <lgl> …
## $ retweeted_status.quoted_status.is_quote_status                         <lgl> …
## $ retweeted_status.quoted_status.quote_count                             <int> …
## $ retweeted_status.quoted_status.reply_count                             <int> …
## $ retweeted_status.quoted_status.retweet_count                           <int> …
## $ retweeted_status.quoted_status.favorite_count                          <int> …
## $ retweeted_status.quoted_status.favorited                               <lgl> …
## $ retweeted_status.quoted_status.retweeted                               <lgl> …
## $ retweeted_status.quoted_status.possibly_sensitive                      <lgl> …
## $ retweeted_status.quoted_status.filter_level                            <chr> …
## $ retweeted_status.quoted_status.lang                                    <chr> …
## $ retweeted_status.quoted_status.user.id                                 <dbl> …
## $ retweeted_status.quoted_status.user.id_str                             <chr> …
## $ retweeted_status.quoted_status.user.name                               <chr> …
## $ retweeted_status.quoted_status.user.screen_name                        <chr> …
## $ retweeted_status.quoted_status.user.location                           <chr> …
## $ retweeted_status.quoted_status.user.url                                <chr> …
## $ retweeted_status.quoted_status.user.description                        <chr> …
## $ retweeted_status.quoted_status.user.translator_type                    <chr> …
## $ retweeted_status.quoted_status.user.protected                          <lgl> …
## $ retweeted_status.quoted_status.user.verified                           <lgl> …
## $ retweeted_status.quoted_status.user.followers_count                    <int> …
## $ retweeted_status.quoted_status.user.friends_count                      <int> …
## $ retweeted_status.quoted_status.user.listed_count                       <int> …
## $ retweeted_status.quoted_status.user.favourites_count                   <int> …
## $ retweeted_status.quoted_status.user.statuses_count                     <int> …
## $ retweeted_status.quoted_status.user.created_at                         <chr> …
## $ retweeted_status.quoted_status.user.utc_offset                         <lgl> …
## $ retweeted_status.quoted_status.user.time_zone                          <lgl> …
## $ retweeted_status.quoted_status.user.geo_enabled                        <lgl> …
## $ retweeted_status.quoted_status.user.lang                               <lgl> …
## $ retweeted_status.quoted_status.user.contributors_enabled               <lgl> …
## $ retweeted_status.quoted_status.user.is_translator                      <lgl> …
## $ retweeted_status.quoted_status.user.profile_background_color           <chr> …
## $ retweeted_status.quoted_status.user.profile_background_image_url       <chr> …
## $ retweeted_status.quoted_status.user.profile_background_image_url_https <chr> …
## $ retweeted_status.quoted_status.user.profile_background_tile            <lgl> …
## $ retweeted_status.quoted_status.user.profile_link_color                 <chr> …
## $ retweeted_status.quoted_status.user.profile_sidebar_border_color       <chr> …
## $ retweeted_status.quoted_status.user.profile_sidebar_fill_color         <chr> …
## $ retweeted_status.quoted_status.user.profile_text_color                 <chr> …
## $ retweeted_status.quoted_status.user.profile_use_background_image       <lgl> …
## $ retweeted_status.quoted_status.user.profile_image_url                  <chr> …
## $ retweeted_status.quoted_status.user.profile_image_url_https            <chr> …
## $ retweeted_status.quoted_status.user.profile_banner_url                 <chr> …
## $ retweeted_status.quoted_status.user.default_profile                    <lgl> …
## $ retweeted_status.quoted_status.user.default_profile_image              <lgl> …
## $ retweeted_status.quoted_status.user.following                          <lgl> …
## $ retweeted_status.quoted_status.user.follow_request_sent                <lgl> …
## $ retweeted_status.quoted_status.user.notifications                      <lgl> …
## $ retweeted_status.quoted_status.extended_tweet.full_text                <chr> …
## $ retweeted_status.quoted_status.extended_tweet.display_text_range       <list> …
## $ retweeted_status.quoted_status.extended_tweet.entities.hashtags        <list> …
## $ retweeted_status.quoted_status.extended_tweet.entities.urls            <list> …
## $ retweeted_status.quoted_status.extended_tweet.entities.user_mentions   <list> …
## $ retweeted_status.quoted_status.extended_tweet.entities.symbols         <list> …
## $ retweeted_status.quoted_status.extended_tweet.entities.media           <list> …
## $ retweeted_status.quoted_status.extended_tweet.extended_entities.media  <list> …
## $ retweeted_status.quoted_status.entities.hashtags                       <list> …
## $ retweeted_status.quoted_status.entities.urls                           <list> …
## $ retweeted_status.quoted_status.entities.user_mentions                  <list> …
## $ retweeted_status.quoted_status.entities.symbols                        <list> …
## $ retweeted_status.quoted_status_permalink.url                           <chr> …
## $ retweeted_status.quoted_status_permalink.expanded                      <chr> …
## $ retweeted_status.quoted_status_permalink.display                       <chr> …
## $ retweeted_status.extended_entities.media                               <list> …
## $ quoted_status.created_at                                               <chr> …
## $ quoted_status.id                                                       <dbl> …
## $ quoted_status.id_str                                                   <chr> …
## $ quoted_status.text                                                     <chr> …
## $ quoted_status.display_text_range                                       <list> …
## $ quoted_status.source                                                   <chr> …
## $ quoted_status.truncated                                                <lgl> …
## $ quoted_status.in_reply_to_status_id                                    <dbl> …
## $ quoted_status.in_reply_to_status_id_str                                <chr> …
## $ quoted_status.in_reply_to_user_id                                      <dbl> …
## $ quoted_status.in_reply_to_user_id_str                                  <chr> …
## $ quoted_status.in_reply_to_screen_name                                  <chr> …
## $ quoted_status.geo                                                      <lgl> …
## $ quoted_status.coordinates                                              <lgl> …
## $ quoted_status.contributors                                             <lgl> …
## $ quoted_status.is_quote_status                                          <lgl> …
## $ quoted_status.quote_count                                              <int> …
## $ quoted_status.reply_count                                              <int> …
## $ quoted_status.retweet_count                                            <int> …
## $ quoted_status.favorite_count                                           <int> …
## $ quoted_status.favorited                                                <lgl> …
## $ quoted_status.retweeted                                                <lgl> …
## $ quoted_status.possibly_sensitive                                       <lgl> …
## $ quoted_status.filter_level                                             <chr> …
## $ quoted_status.lang                                                     <chr> …
## $ quoted_status.quoted_status_id                                         <dbl> …
## $ quoted_status.quoted_status_id_str                                     <chr> …
## $ quoted_status.user.id                                                  <dbl> …
## $ quoted_status.user.id_str                                              <chr> …
## $ quoted_status.user.name                                                <chr> …
## $ quoted_status.user.screen_name                                         <chr> …
## $ quoted_status.user.location                                            <chr> …
## $ quoted_status.user.url                                                 <chr> …
## $ quoted_status.user.description                                         <chr> …
## $ quoted_status.user.translator_type                                     <chr> …
## $ quoted_status.user.protected                                           <lgl> …
## $ quoted_status.user.verified                                            <lgl> …
## $ quoted_status.user.followers_count                                     <int> …
## $ quoted_status.user.friends_count                                       <int> …
## $ quoted_status.user.listed_count                                        <int> …
## $ quoted_status.user.favourites_count                                    <int> …
## $ quoted_status.user.statuses_count                                      <int> …
## $ quoted_status.user.created_at                                          <chr> …
## $ quoted_status.user.utc_offset                                          <lgl> …
## $ quoted_status.user.time_zone                                           <lgl> …
## $ quoted_status.user.geo_enabled                                         <lgl> …
## $ quoted_status.user.lang                                                <lgl> …
## $ quoted_status.user.contributors_enabled                                <lgl> …
## $ quoted_status.user.is_translator                                       <lgl> …
## $ quoted_status.user.profile_background_color                            <chr> …
## $ quoted_status.user.profile_background_image_url                        <chr> …
## $ quoted_status.user.profile_background_image_url_https                  <chr> …
## $ quoted_status.user.profile_background_tile                             <lgl> …
## $ quoted_status.user.profile_link_color                                  <chr> …
## $ quoted_status.user.profile_sidebar_border_color                        <chr> …
## $ quoted_status.user.profile_sidebar_fill_color                          <chr> …
## $ quoted_status.user.profile_text_color                                  <chr> …
## $ quoted_status.user.profile_use_background_image                        <lgl> …
## $ quoted_status.user.profile_image_url                                   <chr> …
## $ quoted_status.user.profile_image_url_https                             <chr> …
## $ quoted_status.user.profile_banner_url                                  <chr> …
## $ quoted_status.user.default_profile                                     <lgl> …
## $ quoted_status.user.default_profile_image                               <lgl> …
## $ quoted_status.user.following                                           <lgl> …
## $ quoted_status.user.follow_request_sent                                 <lgl> …
## $ quoted_status.user.notifications                                       <lgl> …
## $ quoted_status.place.id                                                 <chr> …
## $ quoted_status.place.url                                                <chr> …
## $ quoted_status.place.place_type                                         <chr> …
## $ quoted_status.place.name                                               <chr> …
## $ quoted_status.place.full_name                                          <chr> …
## $ quoted_status.place.country_code                                       <chr> …
## $ quoted_status.place.country                                            <chr> …
## $ quoted_status.place.bounding_box.type                                  <chr> …
## $ quoted_status.place.bounding_box.coordinates                           <list> …
## $ quoted_status.extended_tweet.full_text                                 <chr> …
## $ quoted_status.extended_tweet.display_text_range                        <list> …
## $ quoted_status.extended_tweet.entities.hashtags                         <list> …
## $ quoted_status.extended_tweet.entities.urls                             <list> …
## $ quoted_status.extended_tweet.entities.user_mentions                    <list> …
## $ quoted_status.extended_tweet.entities.symbols                          <list> …
## $ quoted_status.extended_tweet.entities.media                            <list> …
## $ quoted_status.extended_tweet.extended_entities.media                   <list> …
## $ quoted_status.entities.hashtags                                        <list> …
## $ quoted_status.entities.urls                                            <list> …
## $ quoted_status.entities.user_mentions                                   <list> …
## $ quoted_status.entities.symbols                                         <list> …
## $ quoted_status.entities.media                                           <list> …
## $ quoted_status.extended_entities.media                                  <list> …
## $ quoted_status_permalink.url                                            <chr> …
## $ quoted_status_permalink.expanded                                       <chr> …
## $ quoted_status_permalink.display                                        <chr> …
## $ extended_entities.media                                                <list> …
## $ scopes.followers                                                       <lgl> …

Analyse

Na Count

# -> wie viele Variablen haben wir bitte verpasst?!
# Natürlich viele auch useless, oder?


content <-
  dat2 %>%
  map_dfr(function(x) {sum(is.na(x))}) %>%
  pivot_longer(everything(), 
               names_to = "variable", 
               values_to = "countNA") %>%
  arrange(countNA)

print_table(content)
# Anzahl Vollständige Variablen
nrow(content[content$countNA == 0, ])
## [1] 108
# Anzahl Komplett leere Variablen
nrow(content[content$countNA == 500, ])
## [1] 40

Anzahl Variablen pro NA Count

content %>%
  group_by(countNA) %>%
  tally(name = "n_sum") %>%
  arrange(desc(n_sum)) %>%
  print_table()
# Alle Variablen mit nur NAs
content %>%
  filter(countNA == 500) %>%
  print_table()
#-> evtl. aber auch nur in diesem Sample unbedeutend!

# Alle Variablen mit 0 NAs
content %>%
  filter(countNA == 0) %>%
  print_table()

Distinction Ratio der Variablen

dat2 %>%
  map_dfr(function(x) {n_distinct(x) / length(x)}) %>%
  pivot_longer(everything(), 
               names_to = "variable", 
               values_to = "distinction") %>%
  arrange(desc(distinction)) %>%
  print_table()

Thoughts

  • Hydrate old Data?
    • mit Status IDs durch Twurl
  • Neue Variablen nutzen? -> großes Rename Script und Struktur anpassen