1 Introduction

  • In this notebook, we present the first results of an analysis of green development paths of Nordic regions
  • It is based on patent data from 1990-2015 (PATSTAT, Autumn 2021 Edition)
  • Analysis is done on all Nordic NUTS 2 regions (fractionalized patent allocation by inventor location, DOCDB family level)
  • Industries are captured by NACE2 codes of patents according to the OECD IPC-NACE2 concordance table.
  • Green patents are identified using the Y02 tag in the CPC classification

1.1 Preprocessing

### general options
Sys.setenv(LANG = "en")
options("scipen" = 100, "digits" = 4) # override R's tendency to use scientific notation

### Clean workspace
rm(list=ls())
graphics.off()

### Load packages (maybe need to be installed first)
# Standard
library(tidyverse) # General DS toolkit
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(magrittr) # For advanced piping

Attaching package: ‘magrittr’

The following object is masked from ‘package:purrr’:

    set_names

The following object is masked from ‘package:tidyr’:

    extract
# Databases
library(DBI) # GEneral R database interface
library(RPostgres) # PostgreSQL interface driver 
library(dbplyr) # for dplyr with databases

Attaching package: ‘dbplyr’

The following objects are masked from ‘package:dplyr’:

    ident, sql
# networks
library(tidygraph)

Attaching package: ‘tidygraph’

The following object is masked from ‘package:stats’:

    filter
library(ggraph)
library(ggrepel)

# GEoplot
library(giscoR)
library(sf)
Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
## LOAD DATA

# Regular tables
data_appln <- read_rds('../temp/data_appln.rds')
data_pers_appln  <- read_rds('../temp/data_pers_appln.rds')
data_nace2 <- read_rds('../temp/data_nace2.rds')
data_docdb_fam_cpc <- read_rds('../temp/data_docdb_fam_cpc.rds')
#data_person <- read_rds('../temp/data_person.rds')


# Regional specialization
region_RTA <- read_rds('../temp/region_RTA.rds') %>% 
  mutate(country = nuts %>% str_sub(1,2),
         nuts_period = paste(nuts, 'P', period)) 

region_tech <- read_rds('../temp/region_tech.rds') %>% 
  mutate(country = nuts %>% str_sub(1,2),
         nuts_period = paste(nuts, 'P', period)) 

# Technology space
g_tech <- read_rds('../temp/g_tech.rds')

# Lists
list_nace2 <- read_rds('../temp/list_nace2.rds')

# Applicants
region_applt_appln <- read_rds('../temp/tbl_region_applt_appln.rds') %>% select(-applt_seq_nr, -invt_seq_nr)
region_applt <- read_rds('../temp/tbl_region_applt.rds')
## Defining parameters
n_cutoff = 50
n_cutoff_green = 25
## SELECT FOCUS REGIONS
reg_in = '' # c('SE232', 'NO043', 'DK012')
n_regions = 4     

# Restrict to top N regions
select_region <- region_tech %>%
  group_by(country, nuts) %>%
  summarise(n = sum(weight_frac, na.rm = TRUE),
            n_Y = sum(weight_frac * Y_tag, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(country) %>%
  arrange(desc(n_Y)) %>%
  mutate(index = 1:n()) %>%
  ungroup() %>%
  filter(index <= n_regions | nuts %in% reg_in) %>%
  distinct(nuts) %>%
  pull(nuts)
`summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
rm(reg_in)
# Createdataframe with technology relatedness edgelist
tech_rel <- g_tech %E>%
  mutate(from_nace = .N()$name[from],
         to_nace = .N()$name[to]) %>%
  as_tibble() %>%
  mutate(from = from_nace %>% as.character(),
         to = to_nace %>% as.character()) %>%
  arrange(from, to) %>%
  select(from, to, weight)

tech_rel %<>%
  # Add opposite direction
  bind_rows(tech_rel %>% 
              rename(from_new = to, to_new = from) %>% 
              rename(from = from_new, to = to_new) %>%
              relocate(from, to)) %>%
  # Add self loops
  bind_rows(tech_rel %>%
              distinct(from) %>%
              mutate(to = from,
                     weight = 1)) %>%
  distinct(from, to, .keep_all = TRUE)
# Summarize Regions
region_RTA_agg <- region_RTA %>%
  group_by(country, nuts, period, nuts_period, Y_tag) %>%
  summarise(n_spec = rta_bin %>% sum(na.rm = TRUE),
            n_spec_count = (n_tech_region * rta_bin) %>% sum(na.rm = TRUE),
            HHI = sum((n_tech_region/sum(n_tech_region) * 100)^2) ) %>%
  ungroup() 
`summarise()` has grouped output by 'country', 'nuts', 'period', 'nuts_period'. You can override using the `.groups` argument.

2 Patent application development

  • In the following, a brief descriptive analysis of the development of green and non-green patent application in the Nordics
  • In addition, a breackdown of green patents by top green patenting reagions
# Dataframe with regions and technology fields
tech_dev <- region_RTA %>%
  select(country, period, nuts, nuts_period, nace_group, Y_tag, n_tech_region, rta, rta_bin) %>%
  arrange(country, nuts, nace_group, Y_tag, period) %>%
  group_by(country, nuts, nace_group, Y_tag) %>%
  mutate(n_tech_region_lag = lag(n_tech_region, 1),
         n_tech_region_delta = n_tech_region - n_tech_region_lag,
         pct_tech_region_delta = (n_tech_region - n_tech_region_lag) / ( n_tech_region_lag + 1),
         rta_lag = lag(rta, 1),
         rta_delta = rta - rta_lag,
         rta_bin_lag = lag(rta_bin, 1),
         rta_bin_delta = rta_bin - rta_bin_lag) %>%
  ungroup() %>%
  arrange(country, nuts, nace_group, Y_tag, period) 
data_appln %>%
  mutate(Y_tag = ifelse(Y_tag == TRUE, 'Green', 'Non-Green')) %>%
  filter(appln_filing_year <= 2015, appln_filing_year >= 1985) %>%
  count(appln_filing_year, Y_tag) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = Y_tag)) + 
  geom_line(key_glyph = "timeseries") +
  labs(#title = 'Patent applications: Development',
       #subtitle = 'All Nordic contries, by Y tag',
       x = 'Year',
       y = 'Number applications',
       col = 'Green')

data_pers_appln %>%
  mutate(Y_tag = ifelse(Y_tag == TRUE, 'Green', 'Non-Green')) %>%
  filter(appln_filing_year <= 2015, appln_filing_year >= 1985) %>%
  filter(nuts %in% select_region) %>%
  count(appln_filing_year, nuts, Y_tag, wt = weight_frac) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = nuts)) + 
  geom_line(key_glyph = "timeseries") +
  facet_wrap(vars(Y_tag), scales = 'free') +
  labs(#title = 'Patent applications: Development',
       #subtitle = 'All Nordic contries',
       x = 'Year',
       y = 'Number applications, by region and Y tag',
       col = 'Nuts3')

3 Applicants

  • Applicants of patents filed by inventors in the Nordics
  • That can be domestic or foreign applicants
data_pers_appln %>%
  mutate(Y_tag = ifelse(Y_tag == TRUE, 'Green', 'Non-Green')) %>%
  filter(appln_filing_year <= 2015, appln_filing_year >= 1985) %>%
  count(appln_filing_year, person_ctry_code, Y_tag, wt = weight_frac) %>%
  ggplot(aes(x = appln_filing_year, y = n, col = person_ctry_code)) + 
  geom_line(key_glyph = "timeseries") +
  facet_wrap(vars(Y_tag), scales = 'free') +
    theme(legend.position = 'bottom') + 
  labs(#title = 'Patent applications: Development by country',
       #subtitle = 'All Nordic contries',
       x = 'Year',
       y = 'Number applications, by region and Y tag',
       col = 'Country')

ggsave("../output/paper_figs/fig_3.jpeg", dpi = 300)
Saving 7.29 x 4.51 in image
ggsave("../output/paper_figs/fig_3.eps")
Saving 7.29 x 4.51 in image


region_applt_appln %<>%
  group_by(appln_id) %>%
  mutate(n_frac = 1 / n()) %>%
  ungroup() %>%
  left_join(region_applt %>% select(person_id, han_id, han_name, person_ctry_code, nuts), by = 'person_id') %>%
  left_join(data_appln %>% select(appln_id, docdb_family_id, appln_filing_year, period, Y_tag), by = 'appln_id') %>%
  left_join(data_nace2 %>% select(appln_id, nace_group) %>% group_by(appln_id) %>% mutate(nace_share = 1 / n()) %>% ungroup() %>% nest(nace = c(nace_group, nace_share)), by = 'appln_id') %>%
  drop_na()
# List main applicants
applt_stats <- region_applt_appln %>%
  group_by(han_id, han_name) %>%
  summarise(
    n_pat = sum(n_frac),
    n_Y = sum(n_frac * Y_tag),
    first_pat = min(appln_filing_year)
    ) %>%
  mutate(share_Y = n_Y / n_pat,
         age = 2016 - first_pat,
         incumbant = age >= 10 & n_pat >= 10) %>%
  ungroup() %>%
  arrange(desc(n_pat))
`summarise()` has grouped output by 'han_id'. You can override using the `.groups` argument.
applt_stats %>% head(200)
applt_stats %>% arrange(desc(n_Y)) %>% head(100)

4 Y TAgs

# Share of incumbants by technology
region_techn_incumb <- region_applt_appln %>% 
  left_join(applt_stats %>% select(han_id, incumbant), by = 'han_id') %>%
  unnest(nace) %>%
  mutate(n_weight = n_frac * nace_share) %>%
  group_by(nuts, nace_group, period, Y_tag) %>%
  summarise(n = sum(n_weight),
            n_inc = sum(n_weight * incumbant)) %>%
  ungroup() %>%
  mutate(share_inc = n_inc / n)
`summarise()` has grouped output by 'nuts', 'nace_group', 'period'. You can override using the `.groups` argument.

5 Technology space general

  • We calculate the relatedness of industries by co-occurence pattern following Hidalgo & Hausmann (2007)
  • Revealed technological advantage (RTA) Is sepperatedly calculated for Y-tag and non-Y-tag patents.
data_docdb_fam_cpc %>% 
  filter(Y_tag == TRUE) %>%
  mutate(cpc_class_symbol = cpc_class_symbol %>% str_sub(1,4)) %>%
  count(cpc_class_symbol, sort = TRUE)
set.seed(1337)
coords_tech <- g_tech %>% igraph::layout.fruchterman.reingold() %>% as_tibble()
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
Using compatibility `.name_repair`.
colnames(coords_tech) <- c("x", "y")

6 Regional specialization (RTA) development

  • Comparison of specialization provides in period 1 and 2
g_tech %N>%
  mutate(nace_group_name = nace_group_name %>% str_trunc(50, side = 'right')) %>%
  ggraph(layout =  coords_tech) + 
  geom_edge_link(aes(width = weight, alpha = weight), colour = "grey") + 
  geom_node_point(aes(colour = nace_sec_name, size = dgr)) + 
  geom_node_text(aes(label = nace_group_name, size = dgr, filter = percent_rank(dgr) >= 0.75 ), repel = TRUE) +
  theme_void() +
  theme(legend.position = 'bottom',
        legend.box = "vertical") +
  labs(#title = 'Industry Space (all Nordics)',
       #subtitle = 'Nodes = NACE 2 Industries. Edges: Relatedness'
    )


ggsave("../output/paper_figs/fig_9.jpeg", dpi = 300)
Saving 10 x 10 in image
ggsave("../output/paper_figs/fig_9.eps")
Saving 10 x 10 in image
p1 <- region_RTA_agg  %>%
  filter(nuts %in% select_region) %>%
  pivot_wider(names_from = Y_tag, values_from = c(n_spec, n_spec_count, HHI), values_fill = 0, names_prefix = 'Y_tag_') 

p2 <- p1 %>% 
  select(period, nuts, n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE) %>%
  pivot_wider(names_from = period, values_from = c(n_spec_Y_tag_FALSE, n_spec_Y_tag_TRUE))

7 Analysis for existing green paths:

NOTE: new patentds in specializations?

p1 %>%
  ggplot(aes(x = n_spec_Y_tag_FALSE, y = n_spec_Y_tag_TRUE)) +
  geom_segment(data = p2, 
               aes(x = n_spec_Y_tag_FALSE_1,
                   y = n_spec_Y_tag_TRUE_1,
                   xend = n_spec_Y_tag_FALSE_2,
                   yend = n_spec_Y_tag_TRUE_2,
                   size = 0.75),
               alpha = 0.15,
               arrow = arrow(length = unit(0.5, "cm"), type = "closed"),
               show.legend = FALSE) +
  geom_point(aes(size = n_spec_count_Y_tag_TRUE, col = HHI_Y_tag_TRUE)) +
  geom_text_repel(aes(label = nuts), box.padding = 0.5, max.overlaps = Inf) +
  scale_color_gradient2(low = "skyblue", mid = 'yellow', high = "red", midpoint = 1) +
  scale_size(range = c(2, 10)) + 
  theme(legend.position = 'bottom') + 
  labs(#title = 'Development of new regional specializations', 
       #subtitle = 'By number of green and non green specializations in period 1 and 2',
       x = 'N non-green specializations',
       y = 'N green specializations',
       size = 'N green patents',
       col = 'HHI green patents') 
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.

ggsave("../output/paper_figs/fig_4.jpeg", dpi = 300)
Saving 7.5 x 7.5 in image
ggsave("../output/paper_figs/fig_4.eps")
Saving 7.5 x 7.5 in image
rm(p1, p2)
tech_rel_dev <- tech_rel %>% 
  select(from, to, weight) %>%
  left_join(tech_dev %>% distinct(nace_group, nuts), by = c('from' = 'nace_group')) %>%
  # filter for rta in period 1
  inner_join(tech_dev %>% filter(period == '1', rta_bin == 1) %>% select(nace_group, nuts, Y_tag), by = c('to' = 'nace_group', 'nuts')) %>%
  # filter for new green specialization in period 2
  semi_join(tech_dev %>% filter(period == '2', rta_bin == 1, rta_bin_delta == 1, Y_tag == TRUE), by = c('from' = 'nace_group', 'nuts')) %>%
  rename(nace_group = from, related_techn = to) 
p1 <- tech_rel_dev %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel_max = weight %>% max(),
            rel_sum = weight %>% sum(),
            rel_mean = weight %>% mean()) %>%
  ungroup() %>%
  #
  group_by(nuts, Y_tag) %>%
  summarise(rel = rel_max %>% mean()) %>%
  ungroup() %>%
  #
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% filter(Y_tag == TRUE, period == '2', rta_bin_delta == 1) %>% select(nuts , n_tech_region) %>% count(nuts, wt = n_tech_region), by = c('nuts')) %>%
  mutate(country = nuts %>% str_sub(1,2)) 
`summarise()` has grouped output by 'nuts', 'nace_group'. You can override using the `.groups` argument.`summarise()` has grouped output by 'nuts'. You can override using the `.groups` argument.
x_mid <- mean(p1$Y_FALSE, na.rm = TRUE)
y_mid <- mean(p1$Y_TRUE, na.rm = TRUE)

p1 %>%
  filter(0.5 <= percent_rank(n)) %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n)) +
  geom_vline(xintercept = x_mid, linetype = "dashed", color = 'grey') + 
  geom_hline(yintercept = y_mid, linetype = "dashed", color = 'grey') +
  geom_point(aes(col = country)) +
  geom_text_repel(aes(label = nuts), box.padding = 0.5, max.overlaps = Inf) +
  theme(legend.position="bottom") + 
    labs(#title = 'New green specialization period 2', 
       #subtitle = 'By nuts regions',
       #note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       size = 'N green patents') 


ggsave("../output/paper_figs/fig_5.jpeg", dpi = 300)
Saving 7.5 x 7.5 in image
ggsave("../output/paper_figs/fig_5.eps")
Saving 7.5 x 7.5 in image
rm(p1, x_mid, y_mid)
library(regions)
data(nuts_changes)
list_nuts <- nuts_changes %>% select(code_2021, geo_name_2021) 
colnames(list_nuts) <- c('nuts', 'nuts_name')

list_nuts_add <- tibble(
  nuts = c('NO011', 'NO012', 'NO021', 'NO022', 'NO031', 'NO032', 'NO033', 'NO034', 'NO041', 'NO042', 'NO043', 'NO051', 'NO052', 'NO053', 'NO061', 'NO062', 'NO071', 'NO072', 'NO073'),
  nuts_name = c('Oslo', 'Akershus', 'Hedmark', 'Oppland', 'Østfold', 'Buskerud', 'Vestfold', 'Telemark', 'Aust-Agder', 'Vest-Agder', 'Rogaland', 'Hordaland', 'Sogn og Fjordane', 'Møre og Romsdal', 'Sør-Trøndelag', 'Nord-Trøndelag', 'Nordland', 'Troms', 'Finnmark')
)

list_nuts %<>% bind_rows(list_nuts_add)

!!! MAke additio nal ploty including old green specializations

8 Profiling regions

p1 <- tech_rel_dev %>%
  filter(nuts %in% select_region) %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel = weight %>% max()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% 
              filter(Y_tag == TRUE, period == '2', rta_bin_delta == 1) %>% 
              count(nuts, nace_group, wt = n_tech_region), 
            by = c('nuts', 'nace_group')) %>%
  mutate(country = nuts %>% str_sub(1,2)) %>%
  left_join(list_nace2 %>%select(nace_group, nace_sec_name) %>% distinct(), by = 'nace_group') %>%
  left_join(list_nuts %>% distinct(), by = 'nuts') %>% mutate(nuts_name = paste(nuts, nuts_name, sep = ': ') )
`summarise()` has grouped output by 'nuts', 'nace_group'. You can override using the `.groups` argument.
x_mid <- mean(p1$Y_FALSE, na.rm = TRUE)
y_mid <- mean(p1$Y_TRUE, na.rm = TRUE)

# plotting
p1 %>%
  ggplot(aes(x = Y_FALSE, y = Y_TRUE, size = n, col = nace_sec_name)) +
  geom_point() +
  geom_text_repel(aes(label = nace_group), box.padding = 0.5) +
  geom_vline(xintercept = x_mid, linetype = "dashed", color = 'grey') + 
  geom_hline(yintercept = y_mid, linetype = "dashed", color = 'grey') +
  facet_wrap(vars(nuts_name), ncol = n_regions) +
  theme(legend.position = 'bottom',
        legend.box = "vertical") +
  labs(#title = 'New green specialization period 2', 
       #subtitle = 'By nuts regions',
       #note = 'Relatedness is the mean over all new green specializations, per green specialization largest relatedness to former specialization counted',
       x = 'Relatedness non-green',
       y = 'Relatedness green',
       col = 'NACE',
       size = 'N green patents') 


ggsave("../output/paper_figs/fig_6.jpeg", dpi = 300)
Saving 10 x 10 in image
ggsave("../output/paper_figs/fig_6.eps")
Saving 10 x 10 in image
rm(p1, x_mid, y_mid)
path_green_new <- tech_rel_dev %>%
  group_by(nuts, nace_group, Y_tag) %>%
  summarise(rel = weight %>% max()) %>%
  ungroup() %>%
  pivot_wider(names_from = Y_tag, values_from = rel, names_prefix = 'Y_', values_fill = 0) %>%
  left_join(tech_dev %>% 
              filter(Y_tag == TRUE, period == '2', rta_bin_delta == 1) %>% 
              select(nuts, nace_group, n_tech_region) %>% 
              count(nuts, nace_group, wt = n_tech_region), 
            by = c('nuts', 'nace_group')) %>%
  mutate(green_path = case_when( 
    Y_FALSE <= mean(Y_FALSE) & Y_TRUE <= mean(Y_TRUE) ~ 'creation',
    Y_FALSE <= mean(Y_FALSE) & Y_TRUE > mean(Y_TRUE) ~ 'diversification',
    Y_FALSE > mean(Y_FALSE) & Y_TRUE <= mean(Y_TRUE) ~ 'renewal',
    Y_FALSE > mean(Y_FALSE) & Y_TRUE > mean(Y_TRUE) ~ 'renewal'
    ) ) %>%
  select(-Y_FALSE, - Y_TRUE)
`summarise()` has grouped output by 'nuts', 'nace_group'. You can override using the `.groups` argument.
  • Below a radar plot summing all up.
  • It includes the share of green patents in nace groups folllowing a particular green path (within a nuts region)
  • Color indicates the share of incumbents (+5 years, +50 patents) in the path.
  • Can be used to identify a regions main path and overal profile.

TODO: Increase text size

path_green <- tech_dev %>% 
  mutate(green_path = case_when( 
    Y_tag == TRUE & period == '2' & rta_bin == 1 & rta_bin_delta == 0 & pct_tech_region_delta < 0.1 ~ 'stagnation',
    Y_tag == TRUE & period == '2' & rta_bin == 1 & rta_bin_delta == 0 & pct_tech_region_delta >= 0.1 ~ 'extension',
    Y_tag == TRUE & period == '2' & rta_bin == 0 & rta_bin_delta == -1 ~ 'extinction'
  )) %>%
  drop_na(green_path) %>%
  select(nuts, nace_group, n_tech_region_delta, green_path) %>%
  rename(n = n_tech_region_delta) %>%
  # add existing green paths
  bind_rows(path_green_new) %>%
  mutate(n = n %>% abs()) %>%
  # add incumbant measures
  left_join(region_techn_incumb %>% filter(period == '2', Y_tag == TRUE) %>% select(nuts, nace_group, share_inc), by = c('nuts', 'nace_group')) %>%
  mutate(n_new = n * (1 - share_inc),
         n_inc = n * share_inc)

Break down main path industry

9 Geoplotting

  • I also added a first plotting of main green paths
  • Works well, so we can adds furthr geoplots if necessary.
path_green %>%
  filter(nuts %in% select_region) %>% #, green_path != 'stagnation') %>%
  # split by inc and non_incumbents
  pivot_longer(c(n_new, n_inc), names_to = 'applt_type') %>% 
  # Aggregate
  count(nuts, green_path, applt_type, wt = value) %>%
  complete(nuts, green_path, applt_type, fill = list('n' =  0)) %>%
  # Add overall patents andf make share
  left_join(region_RTA %>%filter(period == '2') %>%  count(nuts, wt = n_region, name = 'n_reg'), by = 'nuts') %>%
  mutate(n_share = n ) %>% #/ n_reg) %>%
  left_join(list_nuts %>% distinct(), by = 'nuts') %>% mutate(nuts_name = paste(nuts, nuts_name, sep = ': ') ) %>%
  # plotting
  ggplot() +
  geom_col(aes(x = green_path, y = n_share, fill = applt_type), alpha = 0.8, position= "stack")  + 
  # Lollipop shaft 
  #geom_segment( aes(x = green_path, y = 0, xend = green_path, yend = 0.002), linetype = "dashed", color = "gray12") + 
  # coord_polar() +
  # coord_flip() + 
  facet_wrap(vars(nuts_name), ncol = n_regions, scales = 'free') +
  theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) + 
  theme(legend.position = 'bottom',
        legend.box = "vertical") +
  labs(#title = 'Regional green paths', 
       #subtitle = 'By nuts regions',
       x = NULL,
       y = NULL,
       size = 'Share green patents',
       col = 'Green path type',
       fill = 'Applicant type') 


ggsave("../output/paper_figs/fig_7.jpeg", dpi = 300)
Saving 10 x 12.5 in image
ggsave("../output/paper_figs/fig_7.eps")
Saving 10 x 12.5 in image
# See: https://ropengov.github.io/giscoR/ 

# Get map of nordics
map_nordic <- gisco_get_nuts(country = c('DNK', 'SWE', 'NOR', 'FIN'), nuts_level = 3, year = '2016')

# filter out Svalbart etc
map_nordic %<>%
  filter(!(NUTS_ID %in% c('NO0B1', 'NO0B2')))

# Group by NUTS by country and convert to lines
country_lines <- map_nordic %>%
  group_by(CNTR_CODE) %>%
  summarise(n = n()) %>%
  ungroup() %>%
  st_cast("MULTILINESTRING")

10 Tables

map_nordic %>%
  # enter main green path
  left_join(path_green %>% count(nuts, green_path, wt = n) %>% group_by(nuts) %>% slice_max(order_by = n, n = 1, with_ties = FALSE) %>% ungroup(), by = c('NUTS_ID' = 'nuts')) %>%
  # plot
  ggplot() + 
  geom_sf(aes(fill = green_path)) +
  geom_sf(data = country_lines, col = "blue", linewidth = 0.1) + 
  theme_void() +
  labs(#title = 'Map: Nordic main green paths', 
       #subtitle = 'By nuts regions',
       #note = 'Excluding NO0B1,NO0B2',
       x = NULL,
       y = NULL,
       fill = 'Main green path') #+ theme(legend.position = 'bottom')


ggsave("../output/paper_figs/fig_8.jpeg", dpi = 300)
Saving 8.5 x 7.5 in image
ggsave("../output/paper_figs/fig_8.eps")
Saving 8.5 x 7.5 in image
# Basics Nr patents
tab_basic <- tech_dev %>%
  filter(period == 2) %>% 
  count(nuts, Y_tag, wt = n_tech_region) %>%
  pivot_wider(names_from = Y_tag, values_from = n, values_fill = 0, names_prefix = 'Y_') %>%
  mutate(Y_FALSE = round(Y_FALSE), Y_TRUE = round(Y_TRUE),
         Y_share = (Y_TRUE / (Y_FALSE + Y_TRUE)  ) %>% round(2)  ) %>%
  left_join(list_nuts %>% distinct(), by = 'nuts') %>% mutate(nuts_name = paste(nuts, nuts_name, sep = ': ') ) %>%
  relocate(nuts_name)

# Applicant 
tab_applt <- region_applt_appln %>%
  rename(weight_applt = weight_frac) %>%
  filter(period == 2) %>%
  left_join(region_tech %>% select(appln_id, nuts, weight) %>% rename(weight_invt = weight), by = c('appln_id', 'nuts')) %>%
  mutate(weight_all = weight_applt * weight_invt) %>%
  count(nuts, han_name, han_id, Y_tag, wt = weight_all) %>%
  pivot_wider(names_from = Y_tag, values_from = n, values_fill = 0, names_prefix = 'Y_') %>%
  mutate(Y_FALSE = round(Y_FALSE), Y_TRUE = round(Y_TRUE),
         Y_share = (Y_TRUE / (Y_FALSE + Y_TRUE)  ) %>% round(2)) %>% 
  group_by(nuts) %>%
  slice_max(order_by = Y_TRUE, n = 1, with_ties = FALSE) %>%
  ungroup() %>%
  left_join(applt_stats %>% select(han_id, incumbant) %>% mutate(incumbant = ifelse(incumbant == TRUE, 'incumb.', 'entrant')), by = 'han_id') %>%
  mutate(applicant = paste0(han_name, ' (',incumbant ,', n green: ', Y_TRUE, ', share:', Y_share, ')')) %>%
  select(nuts, applicant)

tab_nace <- tech_dev %>% 
  filter(period == 2, Y_tag == TRUE) %>%
  group_by(nuts) %>%
  slice_max(order_by = n_tech_region, n = 1, with_ties = FALSE) %>%
  ungroup() %>%
  mutate(top_Y_nace = paste0(nace_group, ' (n ', n_tech_region %>% round(), ')')) %>%
  select(nuts, top_Y_nace)

tab_cpc_Y <- data_docdb_fam_cpc %>%
  mutate(cpc_Y = cpc_class_symbol %>% str_sub(1,4)) %>%
  distinct(docdb_family_id, cpc_Y, .keep_all = TRUE) %>%
  group_by(docdb_family_id) %>%
  mutate(weight_frac = 1 / n()) %>%
  ungroup() %>%
  filter(cpc_Y %>% str_starts('Y02') | cpc_Y %>% str_starts('Y04')) %>%
  count(docdb_family_id, cpc_Y, wt = weight_frac, name = 'weight_cpc') %>%
  inner_join(data_appln %>% filter(period == '2') %>% distinct(appln_id, docdb_family_id), by = 'docdb_family_id') %>%
  left_join(data_pers_appln %>% select(appln_id, nuts,weight_frac) %>% rename(weight_nuts = weight_frac), by = 'appln_id') %>%
  mutate(weight_frac = weight_cpc * weight_nuts) %>%
  count(cpc_Y, nuts, wt = weight_frac) %>%
  group_by(nuts) %>%
  slice_max(order_by = n, n = 1, with_ties = FALSE) %>%
  ungroup() %>%
  mutate(top_Y_cpc = paste0(cpc_Y, ' (n ', n %>% round(), ')')) %>%
  select(nuts, top_Y_cpc)
  
# Note: Its silly that not all is indexed by docdb family.... change for next version
          
tab_path_rel <- path_green %>%
  count(nuts, green_path, wt = n) %>%
  group_by(nuts) %>%
  mutate(share = (n / sum(n)) %>% round(2) ) %>%
  ungroup() %>%
  select(-n) %>%
  pivot_wider(names_from = green_path, values_from = share, names_prefix= '% path ', values_fill = 0) %>%
  arrange(nuts)

tab_path_total <- path_green %>%
  count(nuts, green_path, wt = n) %>%
  mutate(n = n %>% round()) %>%
  pivot_wider(names_from = green_path, values_from = n, names_prefix= 'n path ', values_fill = 0) %>%
  arrange(nuts)
tab_all <- tab_basic %>%
  left_join(tab_cpc_Y, by = 'nuts') %>%  
  left_join(tab_nace, by = 'nuts') %>%  
  left_join(tab_applt, by = 'nuts') %>%
  left_join(tab_path_rel, by = 'nuts') %>%
  left_join(tab_path_total, by = 'nuts') %>%
  select(-nuts)

tab_all
tab_all %>% write_csv2('../output/table_regions_all.csv')

11 Endnotes

#library(stargazer)
#tab_all %>% stargazer(summary = FALSE, type = 'html')
sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Ventura 13.2.1

Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] regions_0.1.8   sf_1.0-9        giscoR_0.3.2    ggrepel_0.9.1   ggraph_2.1.0    tidygraph_1.2.2 dbplyr_2.2.1    RPostgres_1.4.4 DBI_1.1.3       magrittr_2.0.3 
[11] forcats_0.5.2   stringr_1.5.0   dplyr_1.0.10    purrr_1.0.1     readr_2.1.3     tidyr_1.2.1     tibble_3.1.8    ggplot2_3.4.0   tidyverse_1.3.2

loaded via a namespace (and not attached):
 [1] fs_1.6.1            lubridate_1.8.0     bit64_4.0.5         httr_1.4.4          rprojroot_2.0.3     bslib_0.4.2         tools_4.2.1         backports_1.4.1    
 [9] utf8_1.2.2          R6_2.5.1            KernSmooth_2.23-20  colorspace_2.1-0    withr_2.5.0         tidyselect_1.2.0    gridExtra_2.3       bit_4.0.5          
[17] compiler_4.2.1      cli_3.6.0           rvest_1.0.3         xml2_1.3.3          sass_0.4.5          labeling_0.4.2      scales_1.2.1        classInt_0.4-8     
[25] proxy_0.4-27        digest_0.6.31       rmarkdown_2.20      pkgconfig_2.0.3     htmltools_0.5.4     fastmap_1.1.0       rlang_1.0.6         readxl_1.4.1       
[33] rstudioapi_0.14     jquerylib_0.1.4     farver_2.1.1        generics_0.1.3      jsonlite_1.8.4      vroom_1.6.0         googlesheets4_1.0.1 s2_1.1.1           
[41] Rcpp_1.0.10         munsell_0.5.0       fansi_1.0.3         viridis_0.6.2       lifecycle_1.0.3     stringi_1.7.12      yaml_2.3.7          MASS_7.3-57        
[49] grid_4.2.1          blob_1.2.3          parallel_4.2.1      crayon_1.5.2        graphlayouts_0.8.3  haven_2.5.1         hms_1.1.2           knitr_1.42         
[57] pillar_1.8.1        igraph_1.3.5        wk_0.7.1            reprex_2.0.2        glue_1.6.2          evaluate_0.20       modelr_0.1.9        vctrs_0.5.2        
[65] tzdb_0.3.0          tweenr_2.0.2        cellranger_1.1.0    gtable_0.3.1        polyclip_1.10-4     assertthat_0.2.1    cachem_1.0.6        xfun_0.37          
[73] ggforce_0.4.1       countrycode_1.4.0   broom_1.0.1         e1071_1.7-12        class_7.3-20        googledrive_2.0.0   viridisLite_0.4.1   gargle_1.2.1       
[81] units_0.8-1         ellipsis_0.3.2      here_1.0.1         
# plot_techspace_dev(g = g_tech, rta_df = tech_dev, region = 'DK013', layout_nw = coords_tech) 

TODO: GO ON HERE AND DO BETTER DATAVIZ

# TEst for function development
g = g_tech
rta_df = tech_dev
dev_df = tech_rel_dev 
region = 'DK013'
time = '2'
layout_nw = coords_tech

rta_df %<>% 
  filter(nuts == region, period == time, Y_tag == TRUE) %>% 
  select(nace_group, rta, n_tech_region)
  
dev_df %<>%
  filter(nuts == region) %>% 
  group_by(nace_group) %>%
  summarise(prev_nongreen = max(nace_group == related_techn, na.rm = TRUE) %>% as.logical()) %>%
  ungroup() %>%
  replace_na(list(prev_nongreen = FALSE)) %>%
  select(nace_group, prev_nongreen)

g <- g %N>%
  mutate(label = nace_group_name %>% str_trunc(50, side = 'right')) %>%
  left_join(rta_df, by = c("name" = "nace_group")) %N>%
  left_join(dev_df, by = c("name" = "nace_group")) 

g %>%
  ggraph(layout =  coords_tech) + 
  geom_edge_link(aes(width = weight, alpha = weight), colour = "grey") + 
  geom_node_point(aes(colour = rta, shape = prev_nongreen, size = n_tech_region, filter = rta >= 1)) + 
  geom_node_text(aes(label = label, size = n_tech_region, filter = rta >= 1), repel = TRUE) +
  scale_color_gradient2(low = "skyblue", mid = 'yellow', high = "red", midpoint = 1) +
  theme_void() +
  theme(legend.position="bottom") + 
  labs(title = paste("Industry Space:", region, sep = " "),
       subtitle = 'Nodes = NACE 2 Industries. Edges: Relatedness',
       caption = '')

—>

