# Übungen zu Episode VI


#'      _ 
#'     / ) 
#'    / /  
#'   / /               /\ 
#'  / /     .-```-.   / ^`-.  
#'  \ \    /       \_/  (|) `o 
#'   \ \  /   .-.   \\ _  ,--' 
#'    \ \/   /   )   \( `^^^  
#'     \   \/    (    )  
#'      \   )     )  /     
#'       ) /__    | (__  
#'      (___)))   (__)))




###########################
# Lücken-Skript
# Füllen Sie die Kommentare und beschreiben Sie, was die jeweilige Operation macht.
###########################

# ??
library(tidyverse)
library(tidycomm)
library(sjstats)
library(lm.beta)
aj <- read_csv('haim-graefe_automated-news_abs-partly.csv')
woj_befragung <- 
  WoJ %>% 
  mutate(arbeitet_prekaer = if_else(temp_contract == 'Temporary', 1, 0, missing = 0))



# ??
woj_befragung %>% 
  group_by(country, arbeitet_prekaer) %>% 
  summarise(mittelwert = mean(autonomy_selection,
                              na.rm = TRUE)) %>%  
  ggplot(aes(x = arbeitet_prekaer,
             y = mittelwert)) +
  geom_bar(stat = 'identity') +
  facet_wrap(vars(country))


# ??
woj_befragung %>% 
  t_test(group_var = arbeitet_prekaer,
         autonomy_selection,
         paired = FALSE)


# ??
woj_befragung %>% 
  unianova(group_var = country,
           autonomy_selection, 
           descriptives = TRUE)


# ??
woj_befragung %>% 
  unianova(group_var = country,
           autonomy_selection,
           post_hoc = TRUE) %>% 
  pull(post_hoc)



###########################
# Code-Verständnis 2.0
# Der folgende Code enthält noch nicht gelernte Elemente.
# Können Sie trotzdem beschreiben, was hier passiert?
###########################

# ??
library(quanteda)
diktionaer_positiv <- read_lines('dictionary_bingliu_pos.txt',
                                 skip = 34)
diktionaer_negativ <- read_lines('dictionary_bingliu_neg.txt',
                                 skip = 34)
diktionaer <- dictionary(list(positiv = diktionaer_positiv,
                              negativ = diktionaer_negativ))
tweets <- read_csv('tweets_trump-clinton.csv')
tweets_corpus <- corpus(tweets,
                        text_field = 'tweet')
tweets_dfm_diktionaer <- dfm(tweets_corpus,
                             remove_punct = TRUE,
                             remove_symbols = TRUE,
                             remove_numbers = TRUE,
                             remove = stopwords('en'),
                             stem = TRUE,
                             dictionary = diktionaer)


# ??
tweets_dfm_diktionaer_gewichtet <- dfm_weight(tweets_dfm_diktionaer,
                                              scheme = 'prop')
tweets_codiert <- 
  tweets_dfm_diktionaer_gewichtet %>% 
  convert(to = 'data.frame') %>% 
  as_tibble() %>% 
  bind_cols(docvars(tweets_dfm_diktionaer_gewichtet))



# ??
tweets_lm <- lm(favorites ~ candidate + positiv*negativ,
                data = tweets_codiert)
summary(tweets_lm)
lm.beta(tweets_lm)


###########################
# Code-Anpassungen
# Kopieren Sie sich den Code so zusammen, dass er den Anforderungen gerecht wird.
###########################

# Berechnen Sie eine MAnOVa, wobei ...
# - die Daten "woj_befragung" sind
# - die AV autonomy_emphasis ist
# - die UVs country und arbeitet_prekaer sind



# Visualisieren Sie die MAnOVa.



# Berechnen Sie eine Regression auf Basis von "woj_befragung" mit ...
# - autonomy_emphasis als AV
# - work_experience als UV



# Berechnen Sie noch eine Regression, die außerdem noch als UV "autonomy_selection" enthält.
# Welches Regressionsmodell sagt die AV besser voraus?