hduser@benjamin-VirtualBox:~/data$ hduser@benjamin-VirtualBox:~/data$ hduser@benjamin-VirtualBox:~/data$ hduser@benjamin-VirtualBox:~/data$ cat smplReducer2.py #!/usr/bin/env python import sys # maps words to their counts foundKey = "" foundValue = "" isFirst = 1 currentCount = 0 currentCounty2digit = "z" isCountryMappingLine = False # input comes from STDIN for line in sys.stdin: # remove leading and trailing whitespace line = line.strip() try: # parse the input we got from mapper.py cid, county, state2digit, population = line.split('^') #the first line should be a mapping line, otherwise we need to set the currentCountryName to not known if county != "z": #this is a new country which may or may not have people in it currentCounty2digit = state2digit isCountryMappingLine = True else: isCountryMappingLine = False # this is a person we want to count if not isCountryMappingLine: #we only want to count people but use the country line to get the right name currentKey = '%s' % (currentCounty2digit) if foundKey != currentKey: #new combo of keys to count if isFirst == 0: print '%s\t%s' % (foundKey,currentCount) currentCount = 0 #reset the count else: isFirst = 0 foundKey = currentKey #make the found key what we see so when we loop again can see if we increment or print out currentCount += int(population) # we increment anything not in the map list except: pass try: print '%s\t%s' % (foundKey,currentCount) except: pass hduser@benjamin-VirtualBox:~/data$