Time-series analysis of albedo on Svalbard (Part 1)

My current work has been focused on using MOD10A1 albedo data to quantify albedo change in the 21st century across the High Arctic. Data has been collected daily between May – September, to ensure a full melt season is viewed, and has been processed and filtered using various open source software (GDAL, NumPy, QGIS, RSGISLib). Over 10,000 images were analysed across three sites in the Arctic (Austfonna, Humboldt and Agassiz Ice Cap), detailing how albedo has changed daily, monthly and yearly, both spatially and with respect to elevation. Below are some preliminary results for Austfonna glacier in Svalbard with future posts analysing the other sites.

Figure 1 shows the data filtering hierarchy that was utilised for each image (SZA = solar zenith angle).

 

The daily albedo for each year was plotted to both gain an understanding of melt season characteristics, and see whether the amplitude is increasing throughout the time-series. If so, this would infer that albedo values are dropping lower through the melt season (e.g. 2008 – 2013). Melt season length can also be deduced from the width of the quadratic curves.

 

Finally, moving images were created to investigate the spatial patterns in albedo change. Two years were analysed to provide comparison with 2008 (top) seeing a relatively stable melt season and 2013 (bottom) witnessing a more erratic trend with very low albedo values.


Appendix: Below is the code used for filtering the data based on different thresholds for the SZA’s presented in figure 1. Open source software was used throughout its development.

 

#!/usr/bin/env python

#######################################
# An python class to calculate 11 day running statistics for each pixel
# in the image and filtering according to criteria determined by median
# and standard Deviation. Output is a new tiff file.
# Author: Nathan Shaw
#######################################

# import relevent modules
import os.path
import sys
import math
from osgeo import gdal
import numpy as np
import glob


class Filtering(object):

 #function to collect 11 consectutive files for running statistics
 def elevens(startFile, num):
      return (startFile[pos - num: pos + num + 1] for pos in xrange(5, len(startFile)-5,1))

 
 # a function to calculate the median of the 11 files
 def median(valueList):
     order = sorted(valueList)
     if len(order) % 2 == 0:
         return ((order[len(order)/2] + order[(len(order)/2) - 1]) / 2.0)
     else:
         return order[len(order)/2]

 
 # function which returns a tiff from a numpy array
 def createTiff(raster, File, folder, outputFile):
     #find raster projection for creation
     trans = File.GetGeoTransform()
     #find raster size for creation
     Ny, Nx = raster.shape
     format = "GTiff"
     driver = gdal.GetDriverByName( format )
     dst_ds = driver.Create("//home//student//Desktop//DISSERTATION_DATA//" + str(folder.replace('_CLIP','_FILTER')) + "//" + str(outputFile), Nx, Ny, 1, gdal.GDT_Byte)
     dst_ds.GetRasterBand(1).WriteArray(raster)
     dst_ds.SetGeoTransform(trans)
     dst_ds = None
     print outputFile + ' complete'

 
 # a function to compare the statistics from the 11 files and the individual image in question
 discarding pixels as and when needed
 def pixelFiltering(inputFile, outputFile, folder, emptyAv, emptySD, emptyMed):
     data = gdal.Open("//home//student//Desktop//DISSERTATION_DATA//" + str(folder) + "//" + str(inputFile))
     myArray2 = data.ReadAsArray()
     x = myArray2.shape
     #finding image number so different filtering criteria can be imposed for early/
     late melt season.
     dataFind = inputFile[13:16]
     #locate days in melt season where SZA is greater than 70 degrees
     if (int(dataFind) <= 134 or int(dataFind) >= 237):
         for numRows2 in range(x[0]):
             for numCols2 in range(x[1]):
                 pixelValue = myArray2[numRows2, numCols2]
                 #sort through pixels only doing comparison on ones between 0 and 100
                 #select maximum albedo for time of year with 97 possible due to low SZA
                 if (pixelValue >= 97 and pixelValue <= 100): 
                      myArray2[numRows2, numCols2] = 0
                      # define pixel to be zero (or Nan as long as outside 0 - 100)
                 elif (pixelValue > 100 or pixelValue <= 0):
                      pass
                 #filter the pixel value based on median and SD values (unique for below/after 70 degree SZA - and for each glacier)
                 else:
                      if ((pixelValue > emptyAv[numRows2, numCols2] + 1.5*emptySD[numRows2,numCols2]) and (pixelValue > emptyMed[numRows2, numCols2] + 4)):
                      # data outside 2*SD is removed but only if outside median +- 4 aswell. 
                      This is just for SZA > 70
                          myArray2[numRows2, numCols2] = 0

                      elif ((pixelValue < emptyAv[numRows2, numCols2] - 1.5*emptySD[numRows2, numCols2]) and (pixelValue > emptyMed[numRows2, numCols2]
                      + 4)):
                          myArray2[numRows2, numCols2] = 0
                      else:
                          pass
         #output final numpy array to tiff   
         createTiff(myArray2, data, folder, outputFile)
     #for all images where SZA is less than 70 degrees
     else:
         for numRows2 in range(x[0]):
             for numCols2 in range(x[1]):
                  pixelValue = myArray2[numRows2, numCols2]
                  #sort through pixels only doing compariosn on ones between 0 and 100
                  if (pixelValue >= 95 and pixelValue <= 100): 
                       # chose 95 for days when SZA < 70 as albedo never going to be higher
                       myArray2[numRows2, numCols2] = 0
                       # define pixel to be zero (or Nan as long as outside 0 - 100)
                  elif (pixelValue > 100 or pixelValue <= 0):
                       pass
                  else:
                       if ((pixelValue > emptyAv[numRows2, numCols2] + 2*emptySD[numRows2,numCols2]) and (pixelValue > emptyMed[numRows2, numCols2] + 4)):
                       # data outside 2*SD is removed but only if outside median +- 4 aswell. This is just for SZA < 70
                       myArray2[numRows2, numCols2] = 0

                  elif ((pixelValue < emptyAv[numRows2, numCols2] - 2*emptySD[numRows2, numCols2]) and (pixelValue > emptyMed[numRows2, numCols2] + 4)):
                       myArray2[numRows2, numCols2] = 0
                  else:
                       pass
           #output final numpy array to tiff   
           createTiff(myArray2, data, folder, outputFile)   



 # main function which will collect images, stack them, calculate statistics (mean, SD and median) before compare with image in question
 def convertToNumpy(inFile): 
     images = [a for a in os.listdir(inFile) if a.endswith('CLIP')]
     for files in sorted(images):
         images2 = [a for a in os.listdir("//home//student//Desktop//DISSERTATION_DATA//" + str(files)) if a.endswith('.tif')]
         for group in elevens(sorted(images2),5):
             empty = []
             for files2 in group:
                 data = gdal.Open("//home//student//Desktop//DISSERTATION_DATA//" + str(files) + "//" + str(files2)) #could put os.change dir here
                 myArray1 = data.ReadAsArray()
                 empty.append(myArray1)
             #numpy function to stack all images
             array = np.dstack(empty)
             #calculate statistics for each pixel in stacked array
             #get image dimensions
             x = array.shape
             emptyAv = np.empty((x[0],x[1]))
             emptySD = np.empty((x[0],x[1]))
             emptyMed = np.empty((x[0],x[1]))
             for numRows in range(x[0]):
                 for numCols in range(x[1]):
                      pixel_list = array[numRows, numCols]
                      #calculation of mean,SD and median
                      # empty list for albedo values
                      albedoList = []
                      for items in pixel_list:
                           if (items < 95 and items > 0):
                               # selcting pixels with no albedo higher than 95 in JJA, 97 in dates outside SZA of 70 
                               albedoList.append(items)
                           else:
                               pass
                      if len(albedoList) != 0:
                          #calculate average
                          average = sum(albedoList)/len(albedoList)
                          #calculate SD
                          standardDeviation = math.sqrt(sum([dev*dev for dev in [a - average for a in albedoList]]) / len(albedoList))
                          #append all values to empty numpy arrays to allow pixel 
                          by pixel comparison
                          emptyAv[numRows, numCols] = sum(albedoList)/len(albedoList)
                          emptySD[numRows, numCols] = standardDeviation 
                          emptyMed[numRows, numCols] = median(albedoList) 
                      else:
                          emptyAv[numRows, numCols] = 0
                          emptySD[numRows, numCols] = 0
                          emptyMed[numRows, numCols] = 0

    
              pixelFiltering(group[5],str(group[5].replace('.tif','_filter.tif')),files, emptyAv, emptySD, emptyMed)
              print group[5] + '\n'
    
 # executable function
 def run(self, initialDirectory):
     self.convertToNumpy(initialDirectory)
     
if __name__ == "__main__":
    obj = Filtering()
    obj.run("//home//student//Desktop//DISSERTATION_DATA")