Question 2
#Gathered the "countdata" folders
setwd("~/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/OriginalData")
##Get a list of all the file names in original data folder
filelist <- list.files("~/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/OriginalData")
filelist
## [1] "NEON.D01.BART.DP1.10003.001.2015-06.basic.20240127T000425Z.RELEASE-2024"
## [2] "NEON.D01.BART.DP1.10003.001.2016-06.basic.20240127T000425Z.RELEASE-2024"
## [3] "NEON.D01.BART.DP1.10003.001.2017-06.basic.20240127T000425Z.RELEASE-2024"
## [4] "NEON.D01.BART.DP1.10003.001.2018-06.basic.20240127T000425Z.RELEASE-2024"
## [5] "NEON.D01.BART.DP1.10003.001.2019-06.basic.20240127T000425Z.RELEASE-2024"
## [6] "NEON.D01.BART.DP1.10003.001.2020-06.basic.20240127T000425Z.RELEASE-2024"
## [7] "NEON.D01.BART.DP1.10003.001.2021-06.basic.20240127T000425Z.RELEASE-2024"
## [8] "NEON.D01.BART.DP1.10003.001.2022-06.basic.20240127T000425Z.RELEASE-2024"
##For loop to pull out "countdata" files
filenames <- c()
for (i in 1:8) {
setwd(paste0("~/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/OriginalData","/", filelist[i]))
filenames[i] <- list.files(pattern = "countdata")
}
filenames
## [1] "NEON.D01.BART.DP1.10003.001.brd_countdata.2015-06.basic.20231226T232626Z.csv"
## [2] "NEON.D01.BART.DP1.10003.001.brd_countdata.2016-06.basic.20231227T013428Z.csv"
## [3] "NEON.D01.BART.DP1.10003.001.brd_countdata.2017-06.basic.20231227T094709Z.csv"
## [4] "NEON.D01.BART.DP1.10003.001.brd_countdata.2018-06.basic.20231228T172744Z.csv"
## [5] "NEON.D01.BART.DP1.10003.001.brd_countdata.2019-06.basic.20231227T184129Z.csv"
## [6] "NEON.D01.BART.DP1.10003.001.brd_countdata.2020-06.basic.20231227T224944Z.csv"
## [7] "NEON.D01.BART.DP1.10003.001.brd_countdata.2021-06.basic.20231228T010546Z.csv"
## [8] "NEON.D01.BART.DP1.10003.001.brd_countdata.2022-06.basic.20231229T053256Z.csv"
### Question 3
## Pseudocode for major steps
# Cleaning data
# Extract years
# Calculate abundance
# Calculate species richness
##Functions for pseudocode
# -----------------------------
#FUNCTION clean_data
#description: a function that cleans the data for any cases where the scientific name is missing.
#inputs: filelist and filenames
#outputs: cleaned data ready for use
#####################################
years <- c(2015,2016,2017,2018,2019,2020,2021,2022)
clean_data <- function(filelist, filenames) {
for (i in 1:8) {
setwd(paste0("/Users/shelb/OneDrive/Documents/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/OriginalData/", filelist[i]))
RawData <- read.csv(filenames[i], na.strings = c("","NA"))
clean_data <- RawData[complete.cases(RawData["scientificName"]), ]
setwd("C:/Users/shelb/OneDrive/Documents/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/CleanedData")
write.csv(clean_data,paste0("CleanData_",years[i], ".csv"))
}
}
#-----------------------------------
clean_data(filelist, filenames)
cleanList <- list.files("~/UVM/Classes/BIOL6100/ScarfoBio6100Portfolio/Homework11/CleanedData")
#-----------------------------------
#FUNCTION extract_years
#description: a function that extracts the names from the file name
#inputs: cleanList (list of clean file names)
#outputs: years from file names
#####################################
extract_years <- function(cleanList){
years <- str_extract(cleanList, pattern = "\\d{4}")
return(years)
}
#-----------------------------------
extract_years(cleanList)
## [1] "2015" "2016" "2017" "2018" "2019" "2020" "2021" "2022"
#-----------------------------------
#FUNCTION calculate_abundance
#description: a function that calculates the abundance for each year
#inputs: cleanList (list of clean file names)
#outputs: abundance for each year
#####################################
calculate_abundance <- function(cleanList){
clean <- read.csv(cleanList)
abundance <- c()
abundance <- nrow(clean)
print(abundance)
}
#--------------------------------------
#-----------------------------------
#FUNCTION calculate_richness
#description: a function that calculates the cpecies richness for each year
#inputs: cleanList (list of clean file names)
#outputs: species richness for each year
#####################################
calculate_richness <- function(cleanList){
clean <- read.csv(cleanList)
richness <- c()
richness <- length(unique(clean$scientificName))
print(richness)
}
#-------------------------------------
### Question 5
#Run the functions using a for loop
df <- data.frame(File = rep(NULL,8), Year = rep(NULL,8), Abundance = rep(NULL,8), Richness = rep(NULL,8))
for (i in 1:8){
df <- rbind(df, list(File= cleanList[i],
Year= extract_years(cleanList[i]),
Abundance = calculate_abundance(cleanList[i]),
Richness= calculate_richness(cleanList[i])))
}
## [1] 454
## [1] 40
## [1] 684
## [1] 38
## [1] 411
## [1] 34
## [1] 512
## [1] 36
## [1] 402
## [1] 43
## [1] 471
## [1] 45
## [1] 906
## [1] 49
## [1] 581
## [1] 38
## File Year Abundance Richness
## 1 CleanData_2015.csv 2015 454 40
## 2 CleanData_2016.csv 2016 684 38
## 3 CleanData_2017.csv 2017 411 34
## 4 CleanData_2018.csv 2018 512 36
## 5 CleanData_2019.csv 2019 402 43
## 6 CleanData_2020.csv 2020 471 45
## 7 CleanData_2021.csv 2021 906 49
## 8 CleanData_2022.csv 2022 581 38