Land Fragmentation in cattle enterprises in NI

Methods and code for the manuscript quantifying land fragmentation metrics for cattle enterprises in Northern Ireland

This is an R Markdown document which should make the analytical steps clear and repeatable. In lieu of providing the original spatial data (due to GDPR concerns), we present definitions, summary data and visual distributions of the research data.

1. Metadata on herd type and herd size

There are 19008 Herd IDs and 17744 Business IDs across the three study years.

dat1 %>% group_by(year) %>%
  summarise(
    n.herds = n_distinct(HerdId)
  , n.businesses = n_distinct(BusinessId))

## # A tibble: 3 x 3
##   year  n.herds n.businesses
##   <fct>   <int>        <int>
## 1 2015    19008        17744
## 2 2016    19008        17744
## 3 2017    19008        17744

Whilst business IDs can be associated with more than one herd, in the vast majority of cases, there was a 1:1 mapping between Business IDs and Herd IDs; out of 17744 businesses, 1192 were associated with more than one Herd ID (i.e. 6.72%)

dat1 %>% group_by(BusinessId) %>%
  summarise(
    n.herds.per.bus.id = n_distinct(HerdId)) %>%
  filter(n.herds.per.bus.id > 1) %>%
  summarise(n.bus.multip.herds = n_distinct(BusinessId))

## # A tibble: 1 x 1
##   n.bus.multip.herds
##                <int>
## 1               1192

Whilst herds could change production type year on year, The number of different herd types remained broadly consistent across the three years of the study

ggplot(dat1, aes(x = year, fill = HerdType)) +
  geom_bar(stat="count") +
  scale_fill_viridis(discrete = TRUE) +
  labs(x = "Year", y = "Number of herds", fill = "Herd Type") +
  theme_bw()

There is no notable difference in herd types between years.

orig_table <- table(dat1$HerdType, dat1$year)
round(prop.table(orig_table, margin = 2),2)

##           
##            2015 2016 2017
##   Breeder  0.50 0.50 0.49
##   Dairy    0.14 0.14 0.14
##   Finisher 0.10 0.10 0.11
##   Other    0.25 0.25 0.26

chisq.test(orig_table)

## 
##  Pearson's Chi-squared test
## 
## data:  orig_table
## X-squared = 5.3394, df = 6, p-value = 0.5011

Median herd size varied with production type and also remained broadly consistent across the three years of the study, within herd types.

dat1 %>% group_by(HerdType) %>%
  summarise(
     "Median herd size" 
    , q1 = quantile(median_herd_size)[2]
    , median = median(median_herd_size)
    , q2. = quantile(median_herd_size)[4]
    , max = max(median_herd_size))

## # A tibble: 4 x 6
##   HerdType `"Median herd size"`    q1 median   q2.   max
##   <chr>    <chr>                <dbl>  <dbl> <dbl> <dbl>
## 1 Breeder  Median herd size        17   30      54  904.
## 2 Dairy    Median herd size        96  171     277 1836 
## 3 Finisher Median herd size        21   45.5    97 1492.
## 4 Other    Median herd size        15   38      84 1208

ggplot(dat1, aes(x = HerdType, y = median_herd_size, fill = HerdType)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE) +
  labs(x = "Herd Type", y = "Median herd size", fill = "Herd Type") +
  theme_bw() +
  facet_wrap(~year)

2. Research data

2.1. Fragmentation metrics

n_fields - Number of land parcels

The number of individual land parcels associated with each farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of land parcels"
    , q1= quantile(n_fields)[2]
    , median = median(n_fields)
    , q3 = quantile(n_fields)[4]
    , max = max(n_fields))

## # A tibble: 3 x 6
##   year  `"Number of land parcels"`    q1 median    q3   max
##   <fct> <chr>                      <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of land parcels        14     24    39   413
## 2 2016  Number of land parcels        14     24    39   423
## 3 2017  Number of land parcels        14     24    39   444

ggplot(dat1, aes(x = as.factor(year), y = n_fields)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of land parcels") +
  theme_bw()

No variation the number of land parcels at the farm level between years.

tst2 <- dat1 %>% select(HerdType, year, n_fields)
anova(glm(n_fields ~ year, data = tst2), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fields
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023   36362407              
## year  2   268.73     57021   36362138 0.2107   0.81

n_fragments - Number of fragments

The number of land parcel "units" associated with each farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of fragments"
    , q1 = quantile(n_fragments)[2]
    , median = median(n_fragments)
    , q3 = quantile(n_fragments)[4]
    , max = max(n_fragments))

## # A tibble: 3 x 6
##   year  `"Number of fragments"`    q1 median    q3   max
##   <fct> <chr>                   <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of fragments         4      6    10    80
## 2 2016  Number of fragments         4      6    10    81
## 3 2017  Number of fragments         4      6    10    80

ggplot(dat1, aes(x = as.factor(year), y = n_fragments)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of fragments") +
  theme_bw()

No variation the number of fragments at the farm level between years.

tst3 <- dat1 %>% select(HerdType, year, n_fragments)
anova(glm(n_fragments ~ year, data = tst3), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fragments
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    2194821              
## year  2   159.74     57021    2194661 2.0751 0.1256

n_fragments_5m - Number of fragments (5m buffer applied)

The number of land parcel "units" associated with each farm, after the application of a 5m buffer.

dat1 %>% group_by(year) %>%
  summarise(
    "Number of fragments (5m buffer)"
    , q1 = quantile(n_fragments_5m)[2]
    , median = median(n_fragments_5m)
    , q3 = quantile(n_fragments_5m)[4]
    , max = max(n_fragments_5m))

## # A tibble: 3 x 6
##   year  `"Number of fragments (5m buffer)"`    q1 median    q3   max
##   <fct> <chr>                               <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of fragments (5m buffer)         2      3     6    39
## 2 2016  Number of fragments (5m buffer)         2      3     6    47
## 3 2017  Number of fragments (5m buffer)         2      3     6    42

ggplot(dat1, aes(x = as.factor(year), y = n_fragments_5m)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of fragments (5m buffer)") +
  theme_bw()

No variation the number of fragments at the farm level (5m buffer) between years.

tst4 <- dat1 %>% select(HerdType, year, n_fragments_5m)
anova(glm(n_fragments_5m ~ year, data = tst4), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_fragments_5m
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023     689708              
## year  2   50.729     57021     689657 2.0972 0.1228

total_farm_area_ha - Farm area

The total farm area in hectares.

dat1 %>% group_by(year) %>%
  summarise(
    "Farm area (ha)"
    , q1 = quantile(total_farm_area_ha, na.rm = T)[2]
    , median = median(total_farm_area_ha, na.rm = T)
    , q3 = quantile(total_farm_area_ha, na.rm = T)[4]
    , max = max(total_farm_area_ha, na.rm = T))

## # A tibble: 3 x 6
##   year  `"Farm area (ha)"`    q1 median    q3   max
##   <fct> <chr>              <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Farm area (ha)      16.5   31.2  59   1403 
## 2 2016  Farm area (ha)      16.4   31.1  59.2 3241 
## 3 2017  Farm area (ha)      16.4   31.4  59.1 3252.

ggplot(dat1, aes(x = as.factor(year), y = total_farm_area_ha)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Farm area (ha)") +
  theme_bw()

No variation in farm area at the farm level between years.

tst5 <- dat1 %>% select(HerdType, year, total_farm_area_ha)
anova(glm(total_farm_area_ha ~ year, data = tst5), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: total_farm_area_ha
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023  224573593              
## year  2   3521.5     57021  224570071 0.4471 0.6395

2.2. Dispersal metrics

median_distance_fragments_km - Median distance between fragments

The median distance (in km) between the centroids of all land parcels associated with a cattle farm.

dat1 %>% group_by(year) %>%
  summarise(
    "Median distance between fragments (km)"
    , q1 = quantile(median_distance_fragments_km, na.rm = T)[2]
    , median = median(median_distance_fragments_km, na.rm = T)
    , q3 = quantile(median_distance_fragments_km, na.rm = T)[4]
    , max = max(median_distance_fragments_km, na.rm = T))

## # A tibble: 3 x 6
##   year  `"Median distance between fragments (km)"`    q1 median    q3   max
##   <fct> <chr>                                      <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Median distance between fragments (km)     0.528   1.39  3.04  120.
## 2 2016  Median distance between fragments (km)     0.518   1.36  3.03  120.
## 3 2017  Median distance between fragments (km)     0.518   1.38  3.07  120.

ggplot(dat1, aes(x = as.factor(year), y = median_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Median distance between fragments (km)") +
  theme_bw()

No variation in median distance between fragments at the farm level between years.

tst6 <- dat1 %>% select(HerdType, year, median_distance_fragments_km)
anova(glm(median_distance_fragments_km ~ year, data = tst6), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: median_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    1243464              
## year  2   22.497     57021    1243442 0.5158  0.597

max_distance_fragments_km - Maximum distance between fragments

The max distance (in km) between the centroids of the two most distal land parcels associated with a cattle farm.

dat1 %>% #group_by(year) %>%
  summarise(
    "Max distance between fragments (km)"
    , q1 = quantile(max_distance_fragments_km, na.rm = T)[2]
    , median = median(max_distance_fragments_km, na.rm = T)
    , q3 = quantile(max_distance_fragments_km, na.rm = T)[4]
    , max = max(max_distance_fragments_km, na.rm = T))

##   "Max distance between fragments (km)"    q1 median    q3     max
## 1   Max distance between fragments (km) 0.639  2.321 6.088 152.015

ggplot(dat1, aes(x = as.factor(year), y = max_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Max distance between fragments (km)") +
  theme_bw()

No variation in max distance between fragments at the farm level between years.

tst7 <- dat1 %>% select(HerdType, year, max_distance_fragments_km)
anova(glm(max_distance_fragments_km ~ year, data = tst7), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: max_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    4198596              
## year  2    142.5     57021    4198453 0.9677   0.38

Distance between registered homestead and farm centroid, biggest fragement and biggest land parcel

The distances (in km) between the farm homestead (registered address) and the centroid of all land parcels, the centroid of the biggest field and the centroid of the biggest fragment.

dat1$homestead_farmcentroid_dist_km <- round(as.numeric(dat1$homestead_farmcentroid_dist_km),3)
dat1$homestead_field_dist_km <- round(as.numeric(dat1$homestead_field_dist_km),3)
dat1$homestead_frag_dist_km <- round(as.numeric(dat1$homestead_frag_dist_km),3)

dat1 %>%
  summarise(
    "Distance: homestead & farm centroid (km)"
    , q1 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[2]
    , median = median(homestead_farmcentroid_dist_km, na.rm = T)
    , q3 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[4]
    , max = max(homestead_farmcentroid_dist_km, na.rm = T))

##   "Distance: homestead & farm centroid (km)"    q1 median    q3     max
## 1   Distance: homestead & farm centroid (km) 0.273  0.667 1.705 112.756

dat1 %>%
  summarise(
    "Distance: homestead & largest parcel (km)"
    , q1 = quantile(homestead_field_dist_km, na.rm = T)[2]
    , median= median(homestead_field_dist_km, na.rm = T)
    , q3 = quantile(homestead_field_dist_km, na.rm = T)[4]
    , max = max(homestead_field_dist_km, na.rm = T))

##   "Distance: homestead & largest parcel (km)"   q1 median    q3    max
## 1   Distance: homestead & largest parcel (km) 0.31  0.686 2.068 127.47

dat1 %>%
  summarise(
    "Distance: homestead & largest fragment (km)"
    , q1 = quantile(homestead_frag_dist_km, na.rm = T)[2]
    , median = median(homestead_frag_dist_km, na.rm = T)
    , q3 = quantile(homestead_frag_dist_km, na.rm = T)[4]
    , max = max(homestead_frag_dist_km, na.rm = T))

##   "Distance: homestead & largest fragment (km)"    q1 median      q3    max
## 1   Distance: homestead & largest fragment (km) 0.223 0.4695 1.58325 127.54

distdf <- dat1 %>% select (HerdId, homestead_farmcentroid_dist_km, homestead_field_dist_km, homestead_frag_dist_km)
distdf$HerdId <- as.factor(distdf$HerdId)
distdf <- gather(distdf, condition, measurement, homestead_farmcentroid_dist_km:homestead_frag_dist_km, factor_key=TRUE)
names(distdf) <- c("HerdId", "DistanceType", "Distance")

ggplot(distdf, aes(x = DistanceType, y = Distance)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Distance measure", y = "Distance (km)") +
  theme_bw()

2.3. Contact metrics

n_touching_cattle_fields_grazing - Number of contiguous internal and external land parcels

The number of external land parcels connected to the index farm, where both internal and external land parcels are suitable for cattle grazing (LCM 4 or 5).

dat1 %>% group_by(year) %>%
  summarise(
    "Number of contiguous grazing fields"
    , q1 = quantile(n_touching_cattle_fields_grazing , na.rm = T)[2]
    , median = median(n_touching_cattle_fields_grazing, na.rm = T)
    , q3 = quantile(n_touching_cattle_fields_grazing, na.rm = T)[4]
    , max = max(n_touching_cattle_fields_grazing, na.rm = T))

## # A tibble: 3 x 6
##   year  `"Number of contiguous grazing fields"`    q1 median    q3   max
##   <fct> <chr>                                   <dbl>  <dbl> <dbl> <int>
## 1 2015  Number of contiguous grazing fields         7     13    21   131
## 2 2016  Number of contiguous grazing fields         7     13    22   167
## 3 2017  Number of contiguous grazing fields         7     12    21   145

ggplot(dat1, aes(x = as.factor(year), y = n_touching_cattle_fields_grazing)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Number of contiguous grazing fields") +
  theme_bw()

Some variation in the number of contiguous grazing fields at the farm level between years, but given the number of tests we have performed, this could be spurious.

tst8 <- dat1 %>% select(HerdType, year, n_touching_cattle_fields_grazing)
anova(glm(n_touching_cattle_fields_grazing ~ year, data = tst8), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: n_touching_cattle_fields_grazing
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                 57023    9757921                     
## year  2     8233     57021    9749688 24.075 3.537e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

total_shared_boundary_grazing_km - The distance (in km) of contact between contiguous internal and external land parcels

The length of the shared perimiter where both internal and external land parcels are suitable for cattle grazing (LCM 4 or 5).

dat1 %>% group_by(year) %>%
  summarise(
    "Length of shared contact boundary (km)"
    , q1 = quantile(total_shared_boundary_grazing_km , na.rm = T)[2]
    , median = median(total_shared_boundary_grazing_km, na.rm = T)
    , q3 = quantile(total_shared_boundary_grazing_km, na.rm = T)[4]
    , max = max(total_shared_boundary_grazing_km, na.rm = T))

## # A tibble: 3 x 6
##   year  `"Length of shared contact boundary (km)"`    q1 median    q3   max
##   <fct> <chr>                                      <dbl>  <dbl> <dbl> <dbl>
## 1 2015  Length of shared contact boundary (km)      1.46   2.81  4.88  44.1
## 2 2016  Length of shared contact boundary (km)      1.54   2.94  5.10  41.1
## 3 2017  Length of shared contact boundary (km)      1.44   2.78  4.85  42.9

ggplot(dat1, aes(x = as.factor(year), y = total_shared_boundary_grazing_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "Length of shared contact boundary (km)") +
  theme_bw()

Some variation in length of shared contact boundary at the farm level between years, but given the number of tests we have performed, this could be spurious.

tst9 <- dat1 %>% select(HerdType, year, total_shared_boundary_grazing_km)
anova(glm(total_shared_boundary_grazing_km ~ year, data = tst9), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: total_shared_boundary_grazing_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F    Pr(>F)    
## NULL                 57023     582419                     
## year  2   464.15     57021     581954 22.739 1.344e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.4. Land classification metrics

LCM4_5 - The percentage of land classified as LCM 4 or LCM 5 per farm

The %age area of the farm classified as LCM4 or LCM5 (suitable for grazing)

dat1$LCM4_5 <- round(((dat1$totLCM4.x + dat1$totLCM5.x)/dat1$totSQ)*100,2)

dat1 %>% #group_by(year) %>%
  summarise(
    "LCM 4 and LCM 5 land on farm (%)"
    , q1= quantile(LCM4_5 , na.rm = T)[2]
    , median = median(LCM4_5, na.rm = T)
    , q3 = quantile(LCM4_5, na.rm = T)[4]
    , max. = max(LCM4_5, na.rm = T))

##   "LCM 4 and LCM 5 land on farm (%)"    q1 median    q3 max.
## 1   LCM 4 and LCM 5 land on farm (%) 70.01   87.8 96.17  100

ggplot(dat1, aes(x = as.factor(year), y = LCM4_5)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Year", y = "LCM 4 and LCM 5 land on farm (%)") +
  theme_bw()

## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

No variation in LCM 4 and LCM 5 land on farm level between years.

tst10 <- dat1 %>% select(HerdType, year, LCM4_5)
anova(glm(LCM4_5 ~ year, data = tst10), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: LCM4_5
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57020   29975059              
## year  2   903.36     57018   29974155 0.8592 0.4235

3. Analysis

3.1. Herd-level variation in farm fragmentation

3.1.1 Farm fragmentation and farm area

To undertake these analysis, relevent continuous variables were categorised as follows.

Fragmentation level

This categorical variable is based directly off n_fragments_5m.
1. 1 fragment: No fragmentation 2. 2-4 fragments: Little fragmentation 3. 5-7 fragments: Medium fragmentation 4. 8-10 fragments: High fragmentation 5. 11+ fragments: Very high fragmentation

dat1$fragment_category <- factor(dat1$fragment_category
                            , levels = c("Not_fragmented"
                            , "Little_fragmentation"
                            , "Medium_fragmentation"
                            , "High_fragmentation"                                                               , "Very_High_fragmentation"))

TABLE 1

dat1 %>% dplyr::select(year,fragment_category) %>% 
  tbl_summary(by = year, label = fragment_category ~ "Fragment Category")%>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")

Characteristic	Year
Characteristic	2015, N = 19,008¹	2016, N = 19,008¹	2017, N = 19,008¹
Fragment Category
Not_fragmented	3,134 (16%)	3,178 (17%)	3,164 (17%)
Little_fragmentation	9,119 (48%)	9,041 (48%)	9,004 (47%)
Medium_fragmentation	4,137 (22%)	4,114 (22%)	4,102 (22%)
High_fragmentation	1,546 (8.1%)	1,574 (8.3%)	1,571 (8.3%)
Very_High_fragmentation	1,072 (5.6%)	1,101 (5.8%)	1,167 (6.1%)
¹ Statistics presented: n (%)

Farm area category

This categorical variable is based directly off the quantiles for total_farm_area_ha (across all years). Farms with total areas < 16.4ha (25th percentile) were classed as "small", farms >= 16.4ha <= 59.1ha (75th percentile) were classed as "medium" and farms larger than 59.1km2 were classed as "large".

dat1$farm_size_category <- factor(dat1$farm_size_category
                            , levels = c("Small"
                            , "Medium"
                            , "Large"))

TABLE 2

dat1 %>% dplyr::select(year, farm_size_category) %>% 
tbl_summary(by = year, label = farm_size_category ~ "Farm area category") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")

Characteristic	Year
Characteristic	2015, N = 19,008¹	2016, N = 19,008¹	2017, N = 19,008¹
Farm area category
Small	4,690 (25%)	4,749 (25%)	4,725 (25%)
Medium	9,575 (50%)	9,501 (50%)	9,532 (50%)
Large	4,743 (25%)	4,758 (25%)	4,751 (25%)
¹ Statistics presented: n (%)

Use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and farm_area_category.

TABLE 3

dat1 %>% dplyr::select(fragment_category, farm_size_category) %>% 
tbl_summary(by = farm_size_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Farm area category**")

Characteristic	Farm area category
Characteristic	Small, N = 14,164¹	Medium, N = 28,608¹	Large, N = 14,252¹
Fragmentation level
Not_fragmented	4,995 (53%)	4,076 (43%)	405 (4.3%)
Little_fragmentation	7,895 (29%)	15,151 (56%)	4,118 (15%)
Medium_fragmentation	1,115 (9.0%)	6,826 (55%)	4,412 (36%)
High_fragmentation	142 (3.0%)	1,942 (41%)	2,607 (56%)
Very_High_fragmentation	17 (0.5%)	613 (18%)	2,710 (81%)
¹ Statistics presented: n (%)

frag_farm_cat_tbl_1 <- table(dat1$fragment_category, dat1$farm_size_category)
lbl_test(frag_farm_cat_tbl_1)

## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 118.73, p-value < 2.2e-16
## alternative hypothesis: two.sided

3.1.2. Farm fragmentation and fragment dispersal

Median distance between fragments category

This categorical variable is based directly off the quantiles for median_distance_fragments_km (across all years). Farms with fragments less than 0.52km apart on average (25th percentile) were classed as "low dispersal"; farms with fragments >= 0.52km and < 3.05km apart were classed as "medium dispersal", and farms with fragments > 3.05km apart (75th percentile) were classed as "high dispersal".

dat1$median_distance_fragments_category <- factor(dat1$median_distance_fragments_category
                            , levels = c("Low"
                            , "Medium"
                            , "High"))

TABLE 4

dat1 %>% dplyr::select(year, median_distance_fragments_category) %>% 
tbl_summary(by = year, label = median_distance_fragments_category ~ "Fragment dispersal category") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")

Characteristic	Year
Characteristic	2015, N = 19,008¹	2016, N = 19,008¹	2017, N = 19,008¹
Fragment dispersal category
Low	4,707 (25%)	4,774 (25%)	4,770 (25%)
Medium	9,559 (50%)	9,507 (50%)	9,452 (50%)
High	4,742 (25%)	4,727 (25%)	4,786 (25%)
¹ Statistics presented: n (%)

use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and fragment_dispersal_category.

TABLE 5

dat1 %>% dplyr::select(fragment_category, median_distance_fragments_category) %>% 
tbl_summary(by = median_distance_fragments_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Fragment dispersal category**")

Characteristic	Fragment dispersal category
Characteristic	Low, N = 14,251¹	Medium, N = 28,518¹	High, N = 14,255¹
Fragmentation level
Not_fragmented	9,476 (100%)	0 (0%)	0 (0%)
Little_fragmentation	4,257 (16%)	15,999 (59%)	6,908 (25%)
Medium_fragmentation	469 (3.8%)	8,031 (65%)	3,853 (31%)
High_fragmentation	33 (0.7%)	2,847 (61%)	1,811 (39%)
Very_High_fragmentation	16 (0.5%)	1,641 (49%)	1,683 (50%)
¹ Statistics presented: n (%)

frag_farm_cat_tbl_2 <- table(dat1$fragment_category, dat1$median_distance_fragments_category)
lbl_test(frag_farm_cat_tbl_2)

## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 121.69, p-value < 2.2e-16
## alternative hypothesis: two.sided

Relationship between median_distance_fragments_km and max_distance_fragments_km

There was a strong, positive correlation between the median distance between all land parcels, and maximum distance between the most distal land parcels.

cor.test(dat1$median_distance_fragments_km, dat1$max_distance_fragments_km, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  dat1$median_distance_fragments_km and dat1$max_distance_fragments_km
## S = 2.0265e+12, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9344263

3.1.3. Farm fragmentation and contact category

Contact category

This categorical variable is based directly off the quantiles for total_shared_boundary_grazing_km. Farms with less than 1.48km of shared boundary (25th percentile) were classed as "low contact"; farms a shared boundary >= 1.48km and < 4.94km were classed as "medium contact", and farms with shared boundaries of > 4.94km apart (75th percentile) were classed as "high contact".

dat1$contact_category <- factor(dat1$contact_category
                            , levels = c("Low"
                            , "Medium"
                            , "High"))

TABLE 6

dat1 %>% dplyr::select(year, contact_category) %>% 
tbl_summary(by = year, label = contact_category ~ "Contact category", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")

Characteristic	Year
Characteristic	2015, N = 19,008¹	2016, N = 19,008¹	2017, N = 19,008¹
Contact category
Low	4,836 (34%)	4,552 (32%)	4,899 (34%)
Medium	9,522 (33%)	9,431 (33%)	9,491 (33%)
High	4,650 (33%)	5,025 (35%)	4,618 (32%)
¹ Statistics presented: n (%)

use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and contact_category.

TABLE 7

dat1 %>% dplyr::select(fragment_category, contact_category) %>% 
tbl_summary(by = contact_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Conact category**")

Characteristic	Conact category
Characteristic	Low, N = 14,287¹	Medium, N = 28,444¹	High, N = 14,293¹
Fragmentation level
Not_fragmented	5,194 (55%)	4,112 (43%)	170 (1.8%)
Little_fragmentation	7,610 (28%)	15,910 (59%)	3,644 (13%)
Medium_fragmentation	1,236 (10%)	6,326 (51%)	4,791 (39%)
High_fragmentation	185 (3.9%)	1,559 (33%)	2,947 (63%)
Very_High_fragmentation	62 (1.9%)	537 (16%)	2,741 (82%)
¹ Statistics presented: n (%)

frag_farm_cat_tbl_3 <- table(dat1$fragment_category, dat1$contact_category)
lbl_test(frag_farm_cat_tbl_3)

## 
##  Asymptotic Linear-by-Linear Association Test
## 
## data:  Var2 (ordered) by
##   Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 125.63, p-value < 2.2e-16
## alternative hypothesis: two.sided

3.2 The variation of farm fragmentation, farm dispersal and farm contact with production type

3.2.1 Production type and farm fragmentation

TABLE 8

dat1 %>% dplyr::select(HerdType, fragment_category) %>% 
tbl_summary(by = fragment_category, percent = "row") %>% 
  add_n() %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**")

Characteristic	N	Herd type				Very_High_fragmentation, N = 3,340¹
Characteristic	N	Not_fragmented, N = 9,476¹	Little_fragmentation, N = 27,164¹	Medium_fragmentation, N = 12,353¹	High_fragmentation, N = 4,691¹	Very_High_fragmentation, N = 3,340¹
HerdType	57,024
Breeder		5,352 (19%)	14,428 (51%)	5,705 (20%)	1,844 (6.5%)	1,086 (3.8%)
Dairy		642 (7.9%)	2,966 (36%)	2,359 (29%)	1,196 (15%)	965 (12%)
Finisher		983 (16%)	2,878 (48%)	1,297 (22%)	475 (7.9%)	343 (5.7%)
Other		2,499 (17%)	6,892 (48%)	2,992 (21%)	1,176 (8.1%)	946 (6.5%)
¹ Statistics presented: n (%)

3.2.2 Production type and farm area

TABLE 9

dat1 %>% dplyr::select(HerdType, total_farm_area_ha) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = total_farm_area_ha ~ "Farm area (ha)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
  add_n() %>% 
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	N	Herd type				p-value¹
Characteristic	N	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Farm area (ha)	57,024					<0.001
Median (IQR)		26 (15, 46)	65 (42, 100)	32 (17, 58)	28 (14, 57)
Range		0, 1,418	2, 1,973	2, 1,007	0, 3,252
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(total_farm_area_ha ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_farm_area_ha by HerdType
## Kruskal-Wallis chi-squared = 6620.6, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = total_farm_area_ha)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Total farm area (ha)") +
  theme_bw()

#### 3.2.3 Production type and land parcels

TABLE 10

dat1 %>% dplyr::select(HerdType, n_fields) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = n_fields ~ "number of land parcels"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
number of land parcels					<0.001
Median (IQR)	22 (13, 35)	39 (27, 56)	21 (13, 35)	22 (12, 37)
Range	0, 423	2, 379	1, 219	0, 444
¹ Statistical tests performed: Kruskal-Wallis test

Only minor changes can be observed WITHIN herd types through time

dat1 %>% group_by(HerdType, year) %>%
  summarise(
    "Number of land parcels"
    , total = n()
    , q1 = quantile(n_fields)[2]
    , median = median(n_fields)
    , q3 = quantile(n_fields)[4]
    , max = max(n_fields))

## # A tibble: 12 x 8
## # Groups:   HerdType [4]
##    HerdType year  `"Number of land parcels"` total    q1 median    q3   max
##    <chr>    <fct> <chr>                      <int> <dbl>  <dbl> <dbl> <int>
##  1 Breeder  2015  Number of land parcels      9464    13     22  35     413
##  2 Breeder  2016  Number of land parcels      9565    13     22  35     423
##  3 Breeder  2017  Number of land parcels      9386    13     22  35     377
##  4 Dairy    2015  Number of land parcels      2740    27     39  56     366
##  5 Dairy    2016  Number of land parcels      2697    27     39  56     379
##  6 Dairy    2017  Number of land parcels      2691    27     40  56     377
##  7 Finisher 2015  Number of land parcels      1970    13     21  34     194
##  8 Finisher 2016  Number of land parcels      1963    13     21  34.5   188
##  9 Finisher 2017  Number of land parcels      2043    13     21  35     219
## 10 Other    2015  Number of land parcels      4834    12     21  37     366
## 11 Other    2016  Number of land parcels      4783    12     21  37     379
## 12 Other    2017  Number of land parcels      4888    12     22  37     444

kruskal.test(n_fields ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  n_fields by HerdType
## Kruskal-Wallis chi-squared = 4537.1, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = n_fields)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Number of land parcels") +
  theme_bw()

3.3.4 Production type and fragments (5m buffer applied)

TABLE 11

dat1 %>% dplyr::select(HerdType, n_fragments_5m) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = n_fragments_5m ~ "Number of fragments (5m buffer)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Number of fragments (5m buffer)					<0.001
Median (IQR)	3 (2, 5)	5 (3, 8)	3 (2, 6)	3 (2, 6)
Range	0, 35	1, 33	1, 32	0, 47
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(n_fragments_5m ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  n_fragments_5m by HerdType
## Kruskal-Wallis chi-squared = 2117.2, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = n_fragments_5m)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Number of fragments (5m buffer)") +
  theme_bw()

3.3.5 Production type and fragment dispersal (median distance between fragments)

TABLE 12

dat1 %>% dplyr::select(HerdType, median_distance_fragments_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = median_distance_fragments_km ~ "Median distance between fragments (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Median distance between fragments (km)					<0.001
Median (IQR)	1.21 (0.43, 2.95)	1.88 (1.04, 3.21)	1.34 (0.50, 3.04)	1.36 (0.47, 3.05)
Range	0.00, 119.53	0.00, 97.09	0.00, 82.97	0.00, 105.15
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(median_distance_fragments_km ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  median_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 699.92, df = 3, p-value < 2.2e-16

As shown previously, there is no variation in the median distance between parcels through time when considering all herd types together

anova(glm(median_distance_fragments_km  ~ year, data = tst6), test = "F")

## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: median_distance_fragments_km
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev      F Pr(>F)
## NULL                 57023    1243464              
## year  2   22.497     57021    1243442 0.5158  0.597

Only minor changes can be observed WITHIN herd types

dat1 %>% group_by(HerdType, year) %>%
  summarise(
    "Median distance between fragments (km)"
    , total = n()
    , q1 = quantile(median_distance_fragments_km)[2]
    , median = median(median_distance_fragments_km)
    , q3 = quantile(median_distance_fragments_km)[4]
    , max = max(median_distance_fragments_km))

## # A tibble: 12 x 8
## # Groups:   HerdType [4]
##    HerdType year  `"Median distance between frag~ total    q1 median    q3   max
##    <chr>    <fct> <chr>                           <int> <dbl>  <dbl> <dbl> <dbl>
##  1 Breeder  2015  Median distance between fragme~  9464 0.438   1.23  2.96 120. 
##  2 Breeder  2016  Median distance between fragme~  9565 0.427   1.21  2.93 120. 
##  3 Breeder  2017  Median distance between fragme~  9386 0.425   1.2   2.96 120. 
##  4 Dairy    2015  Median distance between fragme~  2740 1.03    1.84  3.18  73.2
##  5 Dairy    2016  Median distance between fragme~  2697 1.03    1.88  3.19  52.3
##  6 Dairy    2017  Median distance between fragme~  2691 1.05    1.91  3.22  97.1
##  7 Finisher 2015  Median distance between fragme~  1970 0.512   1.34  3.04  82.6
##  8 Finisher 2016  Median distance between fragme~  1963 0.509   1.34  3.01  83.0
##  9 Finisher 2017  Median distance between fragme~  2043 0.489   1.33  3.07  69.7
## 10 Other    2015  Median distance between fragme~  4834 0.471   1.35  3.04  87.5
## 11 Other    2016  Median distance between fragme~  4783 0.467   1.32  3.04  99.8
## 12 Other    2017  Median distance between fragme~  4888 0.473   1.40  3.07 105.

ggplot(dat1, aes(x = HerdType, y = median_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Median distance between fragments (km)") +
  theme_bw()

3.3.6 Production type and fragment dispersal (maximum distance between fragments)

TABLE 13

dat1 %>% dplyr::select(HerdType, max_distance_fragments_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = max_distance_fragments_km ~ "Maximum distance between fragments (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Maximum distance between fragments (km)					<0.001
Median (IQR)	2.0 (0.5, 5.5)	3.8 (1.7, 7.4)	2.2 (0.6, 6.0)	2.2 (0.6, 6.1)
Range	0.0, 129.1	0.0, 117.2	0.0, 152.0	0.0, 117.5
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(max_distance_fragments_km ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  max_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 1128.5, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = max_distance_fragments_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Maximum distance between fragments (km)") +
  theme_bw()

3.3.7 Production type and fragment dispersal (distance to biggest fragment)

TABLE 14

dat1 %>% dplyr::select(HerdType, homestead_frag_dist_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = homestead_frag_dist_km ~ "Distance to biggest fragment (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Distance to biggest fragment (km)					0.002
Median (IQR)	0.49 (0.22, 1.71)	0.44 (0.25, 1.11)	0.44 (0.22, 1.70)	0.47 (0.22, 1.64)
Range	0.00, 120.83	0.01, 114.34	0.00, 127.54	0.00, 109.33
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(homestead_frag_dist_km ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  homestead_frag_dist_km by HerdType
## Kruskal-Wallis chi-squared = 14.921, df = 3, p-value = 0.001885

ggplot(dat1, aes(x = HerdType, y = homestead_frag_dist_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Distance to biggest fragment (km)") +
  theme_bw()

3.3.8 Production type and contact (total shared boundary)

TABLE 15

dat1 %>% dplyr::select(HerdType, total_shared_boundary_grazing_km) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = total_shared_boundary_grazing_km ~ "Length of shared contact boundary (km)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
Length of shared contact boundary (km)					<0.001
Median (IQR)	2.46 (1.29, 4.13)	5.14 (3.12, 7.81)	2.85 (1.53, 4.84)	2.69 (1.39, 4.85)
Range	0.00, 33.33	0.00, 44.10	0.00, 33.52	0.00, 28.57
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(total_shared_boundary_grazing_km ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_shared_boundary_grazing_km by HerdType
## Kruskal-Wallis chi-squared = 4701.9, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = total_shared_boundary_grazing_km)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "Length of shared contact boundary (km)") +
  theme_bw()

3.3.9 Production type and LCM

TABLE 16

dat1 %>% dplyr::select(HerdType, LCM4_5) %>% 
tbl_summary(by = HerdType
            , type = all_continuous() ~ "continuous2"
            , label = LCM4_5 ~ "LCM 4 and LCM 5 land on farm (%)"
            , statistic = all_continuous() ~ c("{median} ({p25}, {p75})", 
                                     "{min}, {max}")) %>%  
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
  add_p()

Characteristic	Herd type				p-value¹
Characteristic	Breeder, N = 28,415	Dairy, N = 8,128	Finisher, N = 5,976	Other, N = 14,505	p-value¹
LCM 4 and LCM 5 land on farm (%)					<0.001
Median (IQR)	86 (63, 96)	90 (79, 96)	89 (76, 96)	89 (73, 97)
Range	0, 100	1, 100	1, 100	0, 100
Unknown	3	0	0	0
¹ Statistical tests performed: Kruskal-Wallis test

kruskal.test(LCM4_5 ~ HerdType, data = dat1)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  LCM4_5 by HerdType
## Kruskal-Wallis chi-squared = 492.57, df = 3, p-value < 2.2e-16

ggplot(dat1, aes(x = HerdType, y = LCM4_5)) +
  geom_violin() +
  geom_boxplot(width=0.1) +
  labs(x = "Herd Type", y = "LCM 4 and LCM 5 land on farm (%)") +
  theme_bw()

## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

## Warning: Removed 3 rows containing non-finite values (stat_boxplot).