For the hash function, we'll use the cyclic hash code hc_cyclic
with shift value $s = 5$.
This will be followed by MAD compression with $N = 11, a = 7, b = 3$.
def hc_cyclic(s, str):
h = 0
for c in str:
h = (( h << s ) | ( h >> (32-s)))
h = h ^ ord(c)
# Mask h to always be 32 bits
h &= 0xffffffff
return h
def comp_mad(N, a, b, k):
return abs(a*k + b) % N
def h(str):
hc = hc_cyclic(5, str)
# return comp_mad(1000037, 761861, 3, hc)
return comp_mad(11, 7, 3, hc)
#N = 1000037
N = 11
Initially, the hash ht
table is an $N$-long list of empty lists.
ht = []
for i in range(N):
ht.append([])
print(ht)
Insert into ht
by hashing the key to produce an index into ht
and appending the (key, value) pair to the indexed list.
ht[h("marron")].append(("marron","CMSC341-04,CMSC341-07"))
ht[h("marron")]
print(ht)
ht[h("marron")].append(("marron","CMSC341,CMSC441"))
print(ht)
The insert()
function prevents duplicate keys from being entered. If a key is already in the hash table, its value is overwritten.
def insert(ht, h, pair):
key = pair[0]
val = pair[1]
indx = h(key)
if ht[indx] != []:
for i in range(len(ht[indx])):
if ht[indx][i][0] == key:
ht[indx][i] = pair
return
ht[indx].append(pair)
insert(ht, h, ("johnson", "CMSC341,CMSC201"))
ht[h("johnson")]
print(ht)
insert(ht, h, ("johnson", "CMSC202"))
ht[h("johnson")]
print(ht)
insert(ht, h, ("chang", "CMSC441,CMSC641"))
print(ht)
The remove()
function returns True
if the key value was found and the pair removed; it returns False
if the key was not found.
def remove(ht, h, key):
indx = h(key)
if ht[indx] != []:
for i in range(len(ht[indx])):
if ht[indx][i][0] == key:
ht[indx].remove(ht[indx][i])
return True
return False
remove(ht, h, "johnson")
ht[h("johnson")]
remove(ht, h, "chang")
print(ht)
If the key is found, the search
function returns True
and the pair corresponding to the specified key; otherwise, it returns False
and an empty tuple.
def search(ht, h, key):
indx = h(key)
if ht[indx] != []:
for i in range(len(ht[indx])):
if ht[indx][i][0] == key:
return True, ht[indx][i]
return False, ()
insert(ht,h,("johnson", "CMSC341,CMSC201"))
res, pair = search(ht, h, "johnson")
if res:
print(pair)
else:
print("not found")
res, pair = search(ht, h, "marron")
if res:
print(pair)
else:
print("not found")
res, pair = search(ht, h, "chang")
if res:
print(pair)
else:
print("not found")
insert(ht, h, ("chang", "CMSC441,CMSC641"))
print(ht)
h("marron")
h("chang")
To do linear probing, we create the table as a list of empty tuples. We also need to define a vacant
tuple to represent a location in the table where there was data but which has since been removed. Removing can create gaps in the data, but we need to search past these gaps. When we see a vacant
entry, we keep searching; when we see an empty entry, ()
, we stop searching.
htl = []
for i in range(N):
htl.append(())
vacant = ("","") # vacant is *different* from uninitialized
print(htl)
def insertl(htl, h, pair):
key = pair[0]
indx = h(key)
foundVacant = False
vacantIndx = 0
while htl[indx] != ():
if htl[indx][0] == key:
htl[indx] = pair
return
elif ( not foundVacant ) and ( htl[indx] == vacant ):
foundVacant = True
vacantIndx = indx
indx = (indx + 1) % N
if foundVacant:
htl[vacantIndx] = pair
else:
htl[indx] = pair
insertl(htl, h, ("chang", "CMSC441,CMSC641"))
print(htl)
htl[h("chang")]
insertl(htl, h, ("marron", "CMSC341,CMSC441"))
print(htl)
def removel(htl, h, key):
indx = h(key)
while htl[indx] != ():
if htl[indx][0] == key:
htl[indx] = vacant
return True
indx = (indx + 1) % N
return False
insertl(htl, h, ("marron", "CMSC341") )
removel(htl, h, "chang")
print(htl)
def searchl(htl, h, key):
indx = h(key)
while htl[indx] != ():
if htl[indx][0] == key:
return True, htl[indx]
indx = (indx + 1) % N
return False, ()
searchl(htl, h, "chang")
searchl(htl, h, "marron")
searchl(htl, h, "johnson")
insertl(htl, h, ("johnson","CMSC341,CMSC201"))
searchl(htl, h, "johnson")
print(htl)
insertl(htl, h, ("chang","CMSC441,CMSC641"))
print(htl)
removel(htl,h,"marron")
print(htl)
insertl(htl, h, ("marron", "CMSC341,CMSC441"))
print(htl)
In Quadratic Probing, we probe at positions $A[(h + j^2)\mod N] \mbox{ for } j=0, 1, 2, \ldots$. This has the effect of reducing clustering in the table by spreading out the collisions. However, there are problems with quadratic probing. It is possible for the table to be nearly half empty and yet quadratic probing may not find an open location.
Suppose $N = 11 \mbox{ and } h = 2$.
The values $(h + j^2) \mod N$ do not include 1, 4, 8, or 10. If these were the only open locations in the table, quadratic probing would fail to find one.
This phenomena is related to the important mathematical concept of quadratic residues.
htq = []
for i in range(N):
htq.append(())
vacant = ("","") # vacant is *different* from uninitialized
print(htq)
def insertq(htq, h, pair):
key = pair[0]
indx = h(key)
indx0 = indx
i = 1
while htq[indx] != ():
if ( htq[indx][0] == key ) or ( htq[indx] == vacant ):
break
else:
indx = (indx0 + i*i) % N
i += 1
htq[indx] = pair
insertq(htq, h, ("hello", "english"))
insertq(htq, h, ("ciao", "italian"))
insertq(htq, h, ("bonjour", "french"))
insertq(htq, h, ("bienvenidos", "spanish"))
insertq(htq, h, ("konnichiwa", "japanese"))
insertq(htq, h, ("hallo", "icelandic"))
insertq(htq, h, ("szia", "hungarian"))
insertq(htq, h, ("czesc", "polish"))
insertq(htq, h, ("ola", "portugese"))
print(htq)
The following insertion will cause in infinite loop, even though there are still empty buckets.
h("dobry dzien")
insertq(htq, h, ("dobry dzien", "belarusian"))
print(htq)
Let $n \mbox{ be the number of elements in a hash table of size } N$.
The load factor is $\lambda = \frac{n}{N}$. It must always be less than one.
Separate Chaining. We should have $\lambda < 0.9$.
The average number of probes for a successful search is $$\frac{\lambda + 1}{2}.$$
def cprobes(lam):
return (lam +1)/2
cprobes(0.9)
Linear Probing. For linear probing, we want $\lambda < 0.5$.
The expected number of probes is $$ \frac{1 + \frac{1}{(1-\lambda)^2}}{2}$$ so $\lambda < 0.5$ gives fewer than 2.5 expected probes.
def lprobes(lam):
x = 1 / ((1-lam)*(1-lam))
return (1 + x)/2
lprobes(0.9)
lprobes(0.5)
lprobes(.2)
lprobes(.6)
lprobes(.8)
lprobes(.95)
Quadratic Probing. $\lambda < 0.5$ is still appropriate. This guarantees we will not have the problem demonstrated above (failure to find an open bucket, even though there are some).