醒來寫了一下作業…

 

計算指定資料表欄位的mean

pollutantmean <- function(directory, pollutant, id = 1:332) {
    ## 'directory' is a character vector of length 1 indicating
    ## the location of the CSV files

    ## 'pollutant' is a character vector of length 1 indicating
    ## the name of the pollutant for which we will calculate the
    ## mean; either "sulfate" or "nitrate".

    ## 'id' is an integer vector indicating the monitor ID numbers
    ## to be used

    ## Return the mean of the pollutant across all monitors list
    ## in the 'id' vector (ignoring NA values)
    dataSum <- numeric()
    for(i in id)
    {
        dataPath <- sprintf("%s\\%03d.csv", directory, i)
        data <- read.csv(dataPath)
        dataSum <- c(dataSum, data[[pollutant]])
    }

    mean(dataSum, na.rm = TRUE)
}

 

#計算指定資料表的完整資料數

complete <- function(directory, id = 1:332) {
        ## 'directory' is a character vector of length 1 indicating
        ## the location of the CSV files

        ## 'id' is an integer vector indicating the monitor ID numbers
        ## to be used
        
        ## Return a data frame of the form:
        ## id nobs
        ## 1  117
        ## 2  1041
        ## ...
        ## where 'id' is the monitor ID number and 'nobs' is the
        ## number of complete cases

    ident <- numeric()
    nober <- numeric()
    
    for(i in id)
    {
        dataPath <- sprintf("%s\\%03d.csv", directory, i)

        data <- read.csv(dataPath)
        ident <- c(ident, i)
        nober <- c(nober, nrow(na.omit(data)))
    }

    data.frame(id = ident, nobs = nober)
}

 

 

#計算二個變數間的correlation

corr <- function(directory, threshold = 0) {
    ## 'directory' is a character vector of length 1 indicating
    ## the location of the CSV files

    ## 'threshold' is a numeric vector of length 1 indicating the
    ## number of completely observed observations (on all
     ## variables) required to compute the correlation between
    ## nitrate and sulfate; the default is 0

    ## Return a numeric vector of correlations

    corVec <- numeric()
    for(i in 1:332)
    {
        dataPath <- sprintf("%s\\%03d.csv", directory, i)
        data <- read.csv(dataPath)

        completeData <- na.omit(data)

        if(    nrow(completeData) >= threshold &&
            nrow(completeData) != 0)
        {
            corVec <- c(corVec, cor(x = completeData$sulfate, y = completeData$nitrate))
        }
    }

    corVec
}

 

arrow
arrow
    全站熱搜
    創作者介紹
    創作者 cgm 的頭像
    cgm

    資料科學(Data Science)學習筆記

    cgm 發表在 痞客邦 留言(0) 人氣()