Methods and code for the manuscript quantifying land fragmentation metrics for cattle enterprises in Northern Ireland

This is an R Markdown document which should make the analytical steps clear and repeatable. In lieu of providing the original spatial data (due to GDPR concerns), we present definitions, summary data and visual distributions of the research data.

1. Metadata on herd type and herd size


There are 19008 Herd IDs and 17744 Business IDs across the three study years.

dat1 %>% group_by(year) %>%
  summarise(
    n.herds = n_distinct(HerdId)
  , n.businesses = n_distinct(BusinessId))
## # A tibble: 3 x 3
##   year  n.herds n.businesses
##   <fct>   <int>        <int>
## 1 2015    19008        17744
## 2 2016    19008        17744
## 3 2017    19008        17744


Whilst business IDs can be associated with more than one herd, in the vast majority of cases, there was a 1:1 mapping between Business IDs and Herd IDs; out of 17744 businesses, 1192 were associated with more than one Herd ID (i.e. 6.72%)

dat1 %>% group_by(BusinessId) %>%
  summarise(
    n.herds.per.bus.id = n_distinct(HerdId)) %>%
  filter(n.herds.per.bus.id > 1) %>%
  summarise(n.bus.multip.herds = n_distinct(BusinessId))
## # A tibble: 1 x 1
##   n.bus.multip.herds
##                <int>
## 1               1192


Whilst herds could change production type year on year, The number of different herd types remained broadly consistent across the three years of the study

ggplot(dat1, aes(x = year, fill = HerdType)) +
  geom_bar(stat="count") +
  scale_fill_viridis(discrete = TRUE) +
  labs(x = "Year", y = "Number of herds", fill = "Herd Type") +
  theme_bw() 


There is no notable difference in herd types between years.

orig_table <- table(dat1$HerdType, dat1$year)
round(prop.table(orig_table, margin = 2),2)
##           
##            2015 2016 2017
##   Breeder  0.50 0.50 0.49
##   Dairy    0.14 0.14 0.14
##   Finisher 0.10 0.10 0.11
##   Other    0.25 0.25 0.26
chisq.test(orig_table)
## 
##  Pearson's Chi-squared test
## 
## data:  orig_table
## X-squared = 5.3394, df = 6, p-value = 0.5011


Median herd size varied with production type and also remained broadly consistent across the three years of the study, within herd types.

dat1 %>% group_by(HerdType) %>%
  summarise(
     "Median herd size" 
    , q1 = quantile(median_herd_size)[2]
    , median = median(median_herd_size)
    , q2. = quantile(median_herd_size)[4]
    , max = max(median_herd_size))
## # A tibble: 4 x 6
##   HerdType `"Median herd size"`    q1 median   q2.   max
##   <chr>    <chr>                <dbl>  <dbl> <dbl> <dbl>
## 1 Breeder  Median herd size        17   30      54  904.
## 2 Dairy    Median herd size        96  171     277 1836 
## 3 Finisher Median herd size        21   45.5    97 1492.
## 4 Other    Median herd size        15   38      84 1208
ggplot(dat1, aes(x = HerdType, y = median_herd_size, fill = HerdType)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE) +
  labs(x = "Herd Type", y = "Median herd size", fill = "Herd Type") +
  theme_bw() +
  facet_wrap(~year)

2. Research data

2.1. Fragmentation metrics

n_fields - Number of land parcels


The number of individual land parcels associated with each farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of land parcels"
    , q1= quantile(n_fields)[2]
    , median = median(n_fields)
    , q3 = quantile(n_fields)[4]
    , max = max(n_fields))
## # A tibble: 3 x 6
##   year  `"Number of land parcels"`    q1 median    q3   max
##   <fct> <chr>                      <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of land parcels        14     24    39   413
## 2 2016  Number of land parcels        14     24    39   423
## 3 2017  Number of land parcels        14     24    39   444
ggplot(dat1, aes(x = as.factor(year), y = n_fields)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of land parcels") +
  theme_bw() 


No variation the number of land parcels at the farm level between years.

tst2 <- dat1 %>% select(HerdType, year, n_fields)
anova(glm(n_fields ~ year, data = tst2), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fields
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023   36362407              
## year  2   268.73     57021   36362138 0.2107   0.81
n_fragments - Number of fragments


The number of land parcel "units" associated with each farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of fragments"
    , q1 = quantile(n_fragments)[2]
    , median = median(n_fragments)
    , q3 = quantile(n_fragments)[4]
    , max = max(n_fragments))
## # A tibble: 3 x 6
##   year  `"Number of fragments"`    q1 median    q3   max
##   <fct> <chr>                   <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of fragments         4      6    10    80
## 2 2016  Number of fragments         4      6    10    81
## 3 2017  Number of fragments         4      6    10    80
ggplot(dat1, aes(x = as.factor(year), y = n_fragments)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of fragments") +
  theme_bw() 


No variation the number of fragments at the farm level between years.

tst3 <- dat1 %>% select(HerdType, year, n_fragments)
anova(glm(n_fragments ~ year, data = tst3), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fragments
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    2194821              
## year  2   159.74     57021    2194661 2.0751 0.1256
n_fragments_5m - Number of fragments (5m buffer applied)


The number of land parcel "units" associated with each farm, after the application of a 5m buffer.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of fragments (5m buffer)"
    , q1 = quantile(n_fragments_5m)[2]
    , median = median(n_fragments_5m)
    , q3 = quantile(n_fragments_5m)[4]
    , max = max(n_fragments_5m))
## # A tibble: 3 x 6
##   year  `"Number of fragments (5m buffer)"`    q1 median    q3   max
##   <fct> <chr>                               <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of fragments (5m buffer)         2      3     6    39
## 2 2016  Number of fragments (5m buffer)         2      3     6    47
## 3 2017  Number of fragments (5m buffer)         2      3     6    42
ggplot(dat1, aes(x = as.factor(year), y = n_fragments_5m)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of fragments (5m buffer)") +
  theme_bw() 


No variation the number of fragments at the farm level (5m buffer) between years.

tst4 <- dat1 %>% select(HerdType, year, n_fragments_5m)
anova(glm(n_fragments_5m ~ year, data = tst4), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fragments_5m
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023     689708              
## year  2   50.729     57021     689657 2.0972 0.1228
total_farm_area_ha - Farm area


The total farm area in hectares.

dat1 %>% group_by(year) %>%
  summarise(
    "Farm area (ha)"
    , q1 = quantile(total_farm_area_ha, na.rm = T)[2]
    , median = median(total_farm_area_ha, na.rm = T)
    , q3 = quantile(total_farm_area_ha, na.rm = T)[4]
    , max = max(total_farm_area_ha, na.rm = T))
## # A tibble: 3 x 6
##   year  `"Farm area (ha)"`    q1 median    q3   max
##   <fct> <chr>              <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Farm area (ha)      16.5   31.2  59   1403 
## 2 2016  Farm area (ha)      16.4   31.1  59.2 3241 
## 3 2017  Farm area (ha)      16.4   31.4  59.1 3252.
ggplot(dat1, aes(x = as.factor(year), y = total_farm_area_ha)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Farm area (ha)") +
  theme_bw() 


No variation in farm area at the farm level between years.

tst5 <- dat1 %>% select(HerdType, year, total_farm_area_ha)
anova(glm(total_farm_area_ha ~ year, data = tst5), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: total_farm_area_ha
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023  224573593              
## year  2   3521.5     57021  224570071 0.4471 0.6395

2.2. Dispersal metrics

median_distance_fragments_km - Median distance between fragments


The median distance (in km) between the centroids of all land parcels associated with a cattle farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Median distance between fragments (km)"
    , q1 = quantile(median_distance_fragments_km, na.rm = T)[2]
    , median = median(median_distance_fragments_km, na.rm = T)
    , q3 = quantile(median_distance_fragments_km, na.rm = T)[4]
    , max = max(median_distance_fragments_km, na.rm = T))
## # A tibble: 3 x 6
##   year  `"Median distance between fragments (km)"`    q1 median    q3   max
##   <fct> <chr>                                      <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Median distance between fragments (km)     0.528   1.39  3.04  120.
## 2 2016  Median distance between fragments (km)     0.518   1.36  3.03  120.
## 3 2017  Median distance between fragments (km)     0.518   1.38  3.07  120.
ggplot(dat1, aes(x = as.factor(year), y = median_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Median distance between fragments (km)") +
  theme_bw() 


No variation in median distance between fragments at the farm level between years.

tst6 <- dat1 %>% select(HerdType, year, median_distance_fragments_km)
anova(glm(median_distance_fragments_km ~ year, data = tst6), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: median_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    1243464              
## year  2   22.497     57021    1243442 0.5158  0.597
max_distance_fragments_km - Maximum distance between fragments


The max distance (in km) between the centroids of the two most distal land parcels associated with a cattle farm.

dat1 %>% #group_by(year) %>%
  summarise(
    "Max distance between fragments (km)"
    , q1 = quantile(max_distance_fragments_km, na.rm = T)[2]
    , median = median(max_distance_fragments_km, na.rm = T)
    , q3 = quantile(max_distance_fragments_km, na.rm = T)[4]
    , max = max(max_distance_fragments_km, na.rm = T))
##   "Max distance between fragments (km)"    q1 median    q3     max
## 1   Max distance between fragments (km) 0.639  2.321 6.088 152.015
ggplot(dat1, aes(x = as.factor(year), y = max_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Max distance between fragments (km)") +
  theme_bw() 


No variation in max distance between fragments at the farm level between years.

tst7 <- dat1 %>% select(HerdType, year, max_distance_fragments_km)
anova(glm(max_distance_fragments_km ~ year, data = tst7), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: max_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    4198596              
## year  2    142.5     57021    4198453 0.9677   0.38
Distance between registered homestead and farm centroid, biggest fragement and biggest land parcel


The distances (in km) between the farm homestead (registered address) and the centroid of all land parcels, the centroid of the biggest field and the centroid of the biggest fragment.

dat1$homestead_farmcentroid_dist_km <- round(as.numeric(dat1$homestead_farmcentroid_dist_km),3)
dat1$homestead_field_dist_km <- round(as.numeric(dat1$homestead_field_dist_km),3)
dat1$homestead_frag_dist_km <- round(as.numeric(dat1$homestead_frag_dist_km),3)

dat1 %>%
  summarise(
    "Distance: homestead & farm centroid (km)"
    , q1 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[2]
    , median = median(homestead_farmcentroid_dist_km, na.rm = T)
    , q3 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[4]
    , max = max(homestead_farmcentroid_dist_km, na.rm = T))
##   "Distance: homestead & farm centroid (km)"    q1 median    q3     max
## 1   Distance: homestead & farm centroid (km) 0.273  0.667 1.705 112.756
dat1 %>%
  summarise(
    "Distance: homestead & largest parcel (km)"
    , q1 = quantile(homestead_field_dist_km, na.rm = T)[2]
    , median= median(homestead_field_dist_km, na.rm = T)
    , q3 = quantile(homestead_field_dist_km, na.rm = T)[4]
    , max = max(homestead_field_dist_km, na.rm = T))
##   "Distance: homestead & largest parcel (km)"   q1 median    q3    max
## 1   Distance: homestead & largest parcel (km) 0.31  0.686 2.068 127.47
dat1 %>%
  summarise(
    "Distance: homestead & largest fragment (km)"
    , q1 = quantile(homestead_frag_dist_km, na.rm = T)[2]
    , median = median(homestead_frag_dist_km, na.rm = T)
    , q3 = quantile(homestead_frag_dist_km, na.rm = T)[4]
    , max = max(homestead_frag_dist_km, na.rm = T))
##   "Distance: homestead & largest fragment (km)"    q1 median      q3    max
## 1   Distance: homestead & largest fragment (km) 0.223 0.4695 1.58325 127.54
distdf <- dat1 %>% select (HerdId, homestead_farmcentroid_dist_km, homestead_field_dist_km, homestead_frag_dist_km)
distdf$HerdId <- as.factor(distdf$HerdId)
distdf <- gather(distdf, condition, measurement, homestead_farmcentroid_dist_km:homestead_frag_dist_km, factor_key=TRUE)
names(distdf) <- c("HerdId", "DistanceType", "Distance")
ggplot(distdf, aes(x = DistanceType, y = Distance)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Distance measure", y = "Distance (km)") +
  theme_bw() 

2.3. Contact metrics

n_touching_cattle_fields_grazing - Number of contiguous internal and external land parcels


The number of external land parcels connected to the index farm, where both internal and external land parcels are suitable for cattle grazing (LCM 4 or 5).

dat1 %>% group_by(year) %>%
  summarise(
    "Number of contiguous grazing fields"
    , q1 = quantile(n_touching_cattle_fields_grazing , na.rm = T)[2]
    , median = median(n_touching_cattle_fields_grazing, na.rm = T)
    , q3 = quantile(n_touching_cattle_fields_grazing, na.rm = T)[4]
    , max = max(n_touching_cattle_fields_grazing, na.rm = T))
## # A tibble: 3 x 6
##   year  `"Number of contiguous grazing fields"`    q1 median    q3   max
##   <fct> <chr>                                   <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of contiguous grazing fields         7     13    21   131
## 2 2016  Number of contiguous grazing fields         7     13    22   167
## 3 2017  Number of contiguous grazing fields         7     12    21   145
ggplot(dat1, aes(x = as.factor(year), y = n_touching_cattle_fields_grazing)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of contiguous grazing fields") +
  theme_bw() 


Some variation in the number of contiguous grazing fields at the farm level between years, but given the number of tests we have performed, this could be spurious.

tst8 <- dat1 %>% select(HerdType, year, n_touching_cattle_fields_grazing)
anova(glm(n_touching_cattle_fields_grazing ~ year, data = tst8), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_touching_cattle_fields_grazing
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                 57023    9757921                     
## year  2     8233     57021    9749688 24.075 3.537e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
total_shared_boundary_grazing_km - The distance (in km) of contact between contiguous internal and external land parcels


The length of the shared perimiter where both internal and external land parcels are suitable for cattle grazing (LCM 4 or 5).

dat1 %>% group_by(year) %>%
  summarise(
    "Length of shared contact boundary (km)"
    , q1 = quantile(total_shared_boundary_grazing_km , na.rm = T)[2]
    , median = median(total_shared_boundary_grazing_km, na.rm = T)
    , q3 = quantile(total_shared_boundary_grazing_km, na.rm = T)[4]
    , max = max(total_shared_boundary_grazing_km, na.rm = T))
## # A tibble: 3 x 6
##   year  `"Length of shared contact boundary (km)"`    q1 median    q3   max
##   <fct> <chr>                                      <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Length of shared contact boundary (km)      1.46   2.81  4.88  44.1
## 2 2016  Length of shared contact boundary (km)      1.54   2.94  5.10  41.1
## 3 2017  Length of shared contact boundary (km)      1.44   2.78  4.85  42.9
ggplot(dat1, aes(x = as.factor(year), y = total_shared_boundary_grazing_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Length of shared contact boundary (km)") +
  theme_bw() 


Some variation in length of shared contact boundary at the farm level between years, but given the number of tests we have performed, this could be spurious.

tst9 <- dat1 %>% select(HerdType, year, total_shared_boundary_grazing_km)
anova(glm(total_shared_boundary_grazing_km ~ year, data = tst9), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: total_shared_boundary_grazing_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                 57023     582419                     
## year  2   464.15     57021     581954 22.739 1.344e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.4. Land classification metrics

LCM4_5 - The percentage of land classified as LCM 4 or LCM 5 per farm


The %age area of the farm classified as LCM4 or LCM5 (suitable for grazing)

dat1$LCM4_5 <- round(((dat1$totLCM4.x + dat1$totLCM5.x)/dat1$totSQ)*100,2)

dat1 %>% #group_by(year) %>%
  summarise(
    "LCM 4 and LCM 5 land on farm (%)"
    , q1= quantile(LCM4_5 , na.rm = T)[2]
    , median = median(LCM4_5, na.rm = T)
    , q3 = quantile(LCM4_5, na.rm = T)[4]
    , max. = max(LCM4_5, na.rm = T))
##   "LCM 4 and LCM 5 land on farm (%)"    q1 median    q3 max.
## 1   LCM 4 and LCM 5 land on farm (%) 70.01   87.8 96.17  100
ggplot(dat1, aes(x = as.factor(year), y = LCM4_5)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "LCM 4 and LCM 5 land on farm (%)") +
  theme_bw() 
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).


No variation in LCM 4 and LCM 5 land on farm level between years.

tst10 <- dat1 %>% select(HerdType, year, LCM4_5)
anova(glm(LCM4_5 ~ year, data = tst10), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: LCM4_5
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57020   29975059              
## year  2   903.36     57018   29974155 0.8592 0.4235

3. Analysis

3.1. Herd-level variation in farm fragmentation

3.1.1 Farm fragmentation and farm area

To undertake these analysis, relevent continuous variables were categorised as follows.

Fragmentation level


This categorical variable is based directly off n_fragments_5m.
1. 1 fragment: No fragmentation 2. 2-4 fragments: Little fragmentation 3. 5-7 fragments: Medium fragmentation 4. 8-10 fragments: High fragmentation 5. 11+ fragments: Very high fragmentation

dat1$fragment_category <- factor(dat1$fragment_category
                            , levels = c("Not_fragmented"
                            , "Little_fragmentation"
                            , "Medium_fragmentation"
                            , "High_fragmentation"                                                               , "Very_High_fragmentation"))

TABLE 1

dat1 %>% dplyr::select(year,fragment_category) %>% 
  tbl_summary(by = year, label = fragment_category ~ "Fragment Category")%>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic Year
2015, N = 19,0081 2016, N = 19,0081 2017, N = 19,0081
Fragment Category
Not_fragmented 3,134 (16%) 3,178 (17%) 3,164 (17%)
Little_fragmentation 9,119 (48%) 9,041 (48%) 9,004 (47%)
Medium_fragmentation 4,137 (22%) 4,114 (22%) 4,102 (22%)
High_fragmentation 1,546 (8.1%) 1,574 (8.3%) 1,571 (8.3%)
Very_High_fragmentation 1,072 (5.6%) 1,101 (5.8%) 1,167 (6.1%)

1 Statistics presented: n (%)

Farm area category


This categorical variable is based directly off the quantiles for total_farm_area_ha (across all years). Farms with total areas < 16.4ha (25th percentile) were classed as "small", farms >= 16.4ha <= 59.1ha (75th percentile) were classed as "medium" and farms larger than 59.1km2 were classed as "large".

dat1$farm_size_category <- factor(dat1$farm_size_category
                            , levels = c("Small"
                            , "Medium"
                            , "Large"))

TABLE 2

dat1 %>% dplyr::select(year, farm_size_category) %>% 
tbl_summary(by = year, label = farm_size_category ~ "Farm area category") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")

Characteristic Year
2015, N = 19,0081 2016, N = 19,0081 2017, N = 19,0081
Farm area category
Small 4,690 (25%) 4,749 (25%) 4,725 (25%)
Medium 9,575 (50%) 9,501 (50%) 9,532 (50%)
Large 4,743 (25%) 4,758 (25%) 4,751 (25%)

1 Statistics presented: n (%)


Use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and farm_area_category.

TABLE 3

dat1 %>% dplyr::select(fragment_category, farm_size_category) %>% 
tbl_summary(by = farm_size_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Farm area category**")
Characteristic Farm area category
Small, N = 14,1641 Medium, N = 28,6081 Large, N = 14,2521
Fragmentation level
Not_fragmented 4,995 (53%) 4,076 (43%) 405 (4.3%)
Little_fragmentation 7,895 (29%) 15,151 (56%) 4,118 (15%)
Medium_fragmentation 1,115 (9.0%) 6,826 (55%) 4,412 (36%)
High_fragmentation 142 (3.0%) 1,942 (41%) 2,607 (56%)
Very_High_fragmentation 17 (0.5%) 613 (18%) 2,710 (81%)

1 Statistics presented: n (%)

frag_farm_cat_tbl_1 <- table(dat1$fragment_category, dat1$farm_size_category)
lbl_test(frag_farm_cat_tbl_1)
## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 118.73, p-value < 2.2e-16
## alternative hypothesis: two.sided

3.1.2. Farm fragmentation and fragment dispersal

Median distance between fragments category


This categorical variable is based directly off the quantiles for median_distance_fragments_km (across all years). Farms with fragments less than 0.52km apart on average (25th percentile) were classed as "low dispersal"; farms with fragments >= 0.52km and < 3.05km apart were classed as "medium dispersal", and farms with fragments > 3.05km apart (75th percentile) were classed as "high dispersal".

dat1$median_distance_fragments_category <- factor(dat1$median_distance_fragments_category
                            , levels = c("Low"
                            , "Medium"
                            , "High"))

TABLE 4

dat1 %>% dplyr::select(year, median_distance_fragments_category) %>% 
tbl_summary(by = year, label = median_distance_fragments_category ~ "Fragment dispersal category") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic Year
2015, N = 19,0081 2016, N = 19,0081 2017, N = 19,0081
Fragment dispersal category
Low 4,707 (25%) 4,774 (25%) 4,770 (25%)
Medium 9,559 (50%) 9,507 (50%) 9,452 (50%)
High 4,742 (25%) 4,727 (25%) 4,786 (25%)

1 Statistics presented: n (%)


use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and fragment_dispersal_category.

TABLE 5

dat1 %>% dplyr::select(fragment_category, median_distance_fragments_category) %>% 
tbl_summary(by = median_distance_fragments_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Fragment dispersal category**")
Characteristic Fragment dispersal category
Low, N = 14,2511 Medium, N = 28,5181 High, N = 14,2551
Fragmentation level
Not_fragmented 9,476 (100%) 0 (0%) 0 (0%)
Little_fragmentation 4,257 (16%) 15,999 (59%) 6,908 (25%)
Medium_fragmentation 469 (3.8%) 8,031 (65%) 3,853 (31%)
High_fragmentation 33 (0.7%) 2,847 (61%) 1,811 (39%)
Very_High_fragmentation 16 (0.5%) 1,641 (49%) 1,683 (50%)

1 Statistics presented: n (%)

frag_farm_cat_tbl_2 <- table(dat1$fragment_category, dat1$median_distance_fragments_category)
lbl_test(frag_farm_cat_tbl_2)
## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 121.69, p-value < 2.2e-16
## alternative hypothesis: two.sided
Relationship between median_distance_fragments_km and max_distance_fragments_km


There was a strong, positive correlation between the median distance between all land parcels, and maximum distance between the most distal land parcels.

cor.test(dat1$median_distance_fragments_km, dat1$max_distance_fragments_km, method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  dat1$median_distance_fragments_km and dat1$max_distance_fragments_km
## S = 2.0265e+12, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9344263

3.1.3. Farm fragmentation and contact category

Contact category


This categorical variable is based directly off the quantiles for total_shared_boundary_grazing_km. Farms with less than 1.48km of shared boundary (25th percentile) were classed as "low contact"; farms a shared boundary >= 1.48km and < 4.94km were classed as "medium contact", and farms with shared boundaries of > 4.94km apart (75th percentile) were classed as "high contact".

dat1$contact_category <- factor(dat1$contact_category
                            , levels = c("Low"
                            , "Medium"
                            , "High"))

TABLE 6

dat1 %>% dplyr::select(year, contact_category) %>% 
tbl_summary(by = year, label = contact_category ~ "Contact category", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic Year
2015, N = 19,0081 2016, N = 19,0081 2017, N = 19,0081
Contact category
Low 4,836 (34%) 4,552 (32%) 4,899 (34%)
Medium 9,522 (33%) 9,431 (33%) 9,491 (33%)
High 4,650 (33%) 5,025 (35%) 4,618 (32%)

1 Statistics presented: n (%)


use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and contact_category.

TABLE 7

dat1 %>% dplyr::select(fragment_category, contact_category) %>% 
tbl_summary(by = contact_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Conact category**")
Characteristic Conact category
Low, N = 14,2871 Medium, N = 28,4441 High, N = 14,2931
Fragmentation level
Not_fragmented 5,194 (55%) 4,112 (43%) 170 (1.8%)
Little_fragmentation 7,610 (28%) 15,910 (59%) 3,644 (13%)
Medium_fragmentation 1,236 (10%) 6,326 (51%) 4,791 (39%)
High_fragmentation 185 (3.9%) 1,559 (33%) 2,947 (63%)
Very_High_fragmentation 62 (1.9%) 537 (16%) 2,741 (82%)

1 Statistics presented: n (%)

frag_farm_cat_tbl_3 <- table(dat1$fragment_category, dat1$contact_category)
lbl_test(frag_farm_cat_tbl_3)
## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 125.63, p-value < 2.2e-16
## alternative hypothesis: two.sided

3.2 The variation of farm fragmentation, farm dispersal and farm contact with production type

3.2.1 Production type and farm fragmentation

TABLE 8

dat1 %>% dplyr::select(HerdType, fragment_category) %>% 
tbl_summary(by = fragment_category, percent = "row") %>% 
  add_n() %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**")
Characteristic N Herd type Very_High_fragmentation, N = 3,3401
Not_fragmented, N = 9,4761 Little_fragmentation, N = 27,1641 Medium_fragmentation, N = 12,3531 High_fragmentation, N = 4,6911
HerdType 57,024
Breeder 5,352 (19%) 14,428 (51%) 5,705 (20%) 1,844 (6.5%) 1,086 (3.8%)
Dairy 642 (7.9%) 2,966 (36%) 2,359 (29%) 1,196 (15%) 965 (12%)
Finisher 983 (16%) 2,878 (48%) 1,297 (22%) 475 (7.9%) 343 (5.7%)
Other 2,499 (17%) 6,892 (48%) 2,992 (21%) 1,176 (8.1%) 946 (6.5%)

1 Statistics presented: n (%)

3.2.2 Production type and farm area

TABLE 9

dat1 %>% dplyr::select(HerdType, total_farm_area_ha) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = total_farm_area_ha ~ "Farm area (ha)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
  add_n() %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic N Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Farm area (ha) 57,024 <0.001
Median (IQR) 26 (15, 46) 65 (42, 100) 32 (17, 58) 28 (14, 57)
Range 0, 1,418 2, 1,973 2, 1,007 0, 3,252

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(total_farm_area_ha ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_farm_area_ha by HerdType
## Kruskal-Wallis chi-squared = 6620.6, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = total_farm_area_ha)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Total farm area (ha)") +
  theme_bw() 

#### 3.2.3 Production type and land parcels

TABLE 10

dat1 %>% dplyr::select(HerdType, n_fields) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = n_fields ~ "number of land parcels"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
number of land parcels <0.001
Median (IQR) 22 (13, 35) 39 (27, 56) 21 (13, 35) 22 (12, 37)
Range 0, 423 2, 379 1, 219 0, 444

1 Statistical tests performed: Kruskal-Wallis test


Only minor changes can be observed WITHIN herd types through time

dat1 %>% group_by(HerdType, year) %>%
  summarise(
    "Number of land parcels"
    , total = n()
    , q1 = quantile(n_fields)[2]
    , median = median(n_fields)
    , q3 = quantile(n_fields)[4]
    , max = max(n_fields))
## # A tibble: 12 x 8
## # Groups:   HerdType [4]
##    HerdType year  `"Number of land parcels"` total    q1 median    q3   max
##    <chr>    <fct> <chr>                      <int> <dbl>  <dbl> <dbl> <int>
##  1 Breeder  2015  Number of land parcels      9464    13     22  35     413
##  2 Breeder  2016  Number of land parcels      9565    13     22  35     423
##  3 Breeder  2017  Number of land parcels      9386    13     22  35     377
##  4 Dairy    2015  Number of land parcels      2740    27     39  56     366
##  5 Dairy    2016  Number of land parcels      2697    27     39  56     379
##  6 Dairy    2017  Number of land parcels      2691    27     40  56     377
##  7 Finisher 2015  Number of land parcels      1970    13     21  34     194
##  8 Finisher 2016  Number of land parcels      1963    13     21  34.5   188
##  9 Finisher 2017  Number of land parcels      2043    13     21  35     219
## 10 Other    2015  Number of land parcels      4834    12     21  37     366
## 11 Other    2016  Number of land parcels      4783    12     21  37     379
## 12 Other    2017  Number of land parcels      4888    12     22  37     444
kruskal.test(n_fields ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  n_fields by HerdType
## Kruskal-Wallis chi-squared = 4537.1, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = n_fields)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Number of land parcels") +
  theme_bw() 

3.3.4 Production type and fragments (5m buffer applied)

TABLE 11

dat1 %>% dplyr::select(HerdType, n_fragments_5m) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = n_fragments_5m ~ "Number of fragments (5m buffer)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Number of fragments (5m buffer) <0.001
Median (IQR) 3 (2, 5) 5 (3, 8) 3 (2, 6) 3 (2, 6)
Range 0, 35 1, 33 1, 32 0, 47

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(n_fragments_5m ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  n_fragments_5m by HerdType
## Kruskal-Wallis chi-squared = 2117.2, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = n_fragments_5m)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Number of fragments (5m buffer)") +
  theme_bw() 

3.3.5 Production type and fragment dispersal (median distance between fragments)

TABLE 12

dat1 %>% dplyr::select(HerdType, median_distance_fragments_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = median_distance_fragments_km ~ "Median distance between fragments (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Median distance between fragments (km) <0.001
Median (IQR) 1.21 (0.43, 2.95) 1.88 (1.04, 3.21) 1.34 (0.50, 3.04) 1.36 (0.47, 3.05)
Range 0.00, 119.53 0.00, 97.09 0.00, 82.97 0.00, 105.15

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(median_distance_fragments_km ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  median_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 699.92, df = 3, p-value < 2.2e-16


As shown previously, there is no variation in the median distance between parcels through time when considering all herd types together

anova(glm(median_distance_fragments_km  ~ year, data = tst6), test = "F")
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: median_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    1243464              
## year  2   22.497     57021    1243442 0.5158  0.597


Only minor changes can be observed WITHIN herd types

dat1 %>% group_by(HerdType, year) %>%
  summarise(
    "Median distance between fragments (km)"
    , total = n()
    , q1 = quantile(median_distance_fragments_km)[2]
    , median = median(median_distance_fragments_km)
    , q3 = quantile(median_distance_fragments_km)[4]
    , max = max(median_distance_fragments_km))
## # A tibble: 12 x 8
## # Groups:   HerdType [4]
##    HerdType year  `"Median distance between frag~ total    q1 median    q3   max
##    <chr>    <fct> <chr>                           <int> <dbl>  <dbl> <dbl> <dbl>
##  1 Breeder  2015  Median distance between fragme~  9464 0.438   1.23  2.96 120. 
##  2 Breeder  2016  Median distance between fragme~  9565 0.427   1.21  2.93 120. 
##  3 Breeder  2017  Median distance between fragme~  9386 0.425   1.2   2.96 120. 
##  4 Dairy    2015  Median distance between fragme~  2740 1.03    1.84  3.18  73.2
##  5 Dairy    2016  Median distance between fragme~  2697 1.03    1.88  3.19  52.3
##  6 Dairy    2017  Median distance between fragme~  2691 1.05    1.91  3.22  97.1
##  7 Finisher 2015  Median distance between fragme~  1970 0.512   1.34  3.04  82.6
##  8 Finisher 2016  Median distance between fragme~  1963 0.509   1.34  3.01  83.0
##  9 Finisher 2017  Median distance between fragme~  2043 0.489   1.33  3.07  69.7
## 10 Other    2015  Median distance between fragme~  4834 0.471   1.35  3.04  87.5
## 11 Other    2016  Median distance between fragme~  4783 0.467   1.32  3.04  99.8
## 12 Other    2017  Median distance between fragme~  4888 0.473   1.40  3.07 105.
ggplot(dat1, aes(x = HerdType, y = median_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Median distance between fragments (km)") +
  theme_bw() 

3.3.6 Production type and fragment dispersal (maximum distance between fragments)

TABLE 13

dat1 %>% dplyr::select(HerdType, max_distance_fragments_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = max_distance_fragments_km ~ "Maximum distance between fragments (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Maximum distance between fragments (km) <0.001
Median (IQR) 2.0 (0.5, 5.5) 3.8 (1.7, 7.4) 2.2 (0.6, 6.0) 2.2 (0.6, 6.1)
Range 0.0, 129.1 0.0, 117.2 0.0, 152.0 0.0, 117.5

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(max_distance_fragments_km ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  max_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 1128.5, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = max_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Maximum distance between fragments (km)") +
  theme_bw() 

3.3.7 Production type and fragment dispersal (distance to biggest fragment)

TABLE 14

dat1 %>% dplyr::select(HerdType, homestead_frag_dist_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = homestead_frag_dist_km ~ "Distance to biggest fragment (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Distance to biggest fragment (km) 0.002
Median (IQR) 0.49 (0.22, 1.71) 0.44 (0.25, 1.11) 0.44 (0.22, 1.70) 0.47 (0.22, 1.64)
Range 0.00, 120.83 0.01, 114.34 0.00, 127.54 0.00, 109.33

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(homestead_frag_dist_km ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  homestead_frag_dist_km by HerdType
## Kruskal-Wallis chi-squared = 14.921, df = 3, p-value = 0.001885
ggplot(dat1, aes(x = HerdType, y = homestead_frag_dist_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Distance to biggest fragment (km)") +
  theme_bw() 

3.3.8 Production type and contact (total shared boundary)

TABLE 15

dat1 %>% dplyr::select(HerdType, total_shared_boundary_grazing_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = total_shared_boundary_grazing_km ~ "Length of shared contact boundary (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
Length of shared contact boundary (km) <0.001
Median (IQR) 2.46 (1.29, 4.13) 5.14 (3.12, 7.81) 2.85 (1.53, 4.84) 2.69 (1.39, 4.85)
Range 0.00, 33.33 0.00, 44.10 0.00, 33.52 0.00, 28.57

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(total_shared_boundary_grazing_km ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_shared_boundary_grazing_km by HerdType
## Kruskal-Wallis chi-squared = 4701.9, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = total_shared_boundary_grazing_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Length of shared contact boundary (km)") +
  theme_bw() 

3.3.9 Production type and LCM

TABLE 16

dat1 %>% dplyr::select(HerdType, LCM4_5) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = LCM4_5 ~ "LCM 4 and LCM 5 land on farm (%)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()
Characteristic Herd type p-value1
Breeder, N = 28,415 Dairy, N = 8,128 Finisher, N = 5,976 Other, N = 14,505
LCM 4 and LCM 5 land on farm (%) <0.001
Median (IQR) 86 (63, 96) 90 (79, 96) 89 (76, 96) 89 (73, 97)
Range 0, 100 1, 100 1, 100 0, 100
Unknown 3 0 0 0

1 Statistical tests performed: Kruskal-Wallis test

kruskal.test(LCM4_5 ~ HerdType, data = dat1)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  LCM4_5 by HerdType
## Kruskal-Wallis chi-squared = 492.57, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = LCM4_5)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "LCM 4 and LCM 5 land on farm (%)") +
  theme_bw() 
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).