Hashing-to-Disk by Hand

#!/usr/bin/perl -w
#########################  Hashing to Disk #################################

use constant RECS_PER_BUCKET => scalar 4;
use constant RECORD_SIZE => scalar 25;
use constant BUCKETS     => scalar 100;

if (-e "hashfile")  {open(H,"+<hashfile") || die "Cannot open hashfile.\n"}
if (!-e "hashfile") {open(H,"+>hashfile") || die "Cannot open hashfile.\n"}

#  Create an empty hashfile where each record is space-filled, \n-terminated
#  if hashfile does not yet have any size (which means it is new).
if (-z H)
{
print "empty!\n";
     $record = " " x (RECORD_SIZE - 1) . "\n";
     print H $record x (RECS_PER_BUCKET * BUCKETS);
}


#  Account number format is XX-XXXXX-XXX
while(print("Enter account balance: "), ($line = <STDIN>) !~ /^\s*quit\s*$/i)
{
     next if $line =~ /^\s*$/;   #  Re-prompt if no input.

#  Trim leading and trailing spaces from input line.
     $line =~ s/^\s*|\s*$//g;

#  Split at whitespace to get account number and balance off of input line.
     ($acct, $balance, $junk) = split  /\s+/, $line;      

#  Is there an excess field on the input line?
     if (defined $junk)
     {
         print "Bad line format!  Must be two fields!\n";
         next;
     }

#  See if account number and balance are in the correct format.
     $error = error_check($acct, $balance);
     next if $error;

#  Compute hash value by unique account number key.
     $hash = hash_calc($acct);

#  Format record and go to proper hash bucket.
     $record = sprintf("%12s%12.2f\n", $acct, $balance); 
     seek(H, $hash * RECORD_SIZE * RECS_PER_BUCKET, 0);
     
#  Make sure the record isn't already in the bucket.
     $duplicate = check_for_duplicate($acct);
     next if $duplicate;

#  Store the record.  Report error if no room in bucket. 
     store_record($record);
}   #  Finally, the end of the while loop!! 




#  Input loop!  Do a search for records.  This will also tell us if our
#  hash algorithm is correct.
while(print("Enter account: "), ($acct = <STDIN>) !~ /^\s*quit\s*$/i)
{
     next if $acct =~ /^\s*$/;   #  Re-prompt if no input.

#  Remove leading/trailing spaces from account number.
     $acct =~ s/^\s*|\s*$//g;

#  Check for bad account number format.
     if ($acct !~ /^\d{2}-\d{5}-\d{3}$/)
     {
          print "Bad account number format!\n";
          next;
     } 

#  Compute hash value by unique account number key.
     $hash = hash_calc($acct);

#  Go to proper hash bucket.
     seek(H, $hash * RECORD_SIZE * RECS_PER_BUCKET, 0);

#  Search for record.
     find_record($acct);
}   #  End of input while loop.






#  Report errors and reprompt if account number or balance in wrong format.
sub error_check
{
     my ($acct, $balance) = @_;

     if ($acct !~ /^\d{2}-\d{5}-\d{3}$/)
     {
           print "Bad account format!\n";
           return 1;
     }
     if ($balance !~ /^\d+\.\d{2}$/)
     {
           print "Bad balance format!\n";
           return 1;
     } 
     return 0;
}
#  Calculate the hash value of the user's record by summing the squares
#  of the ASCII values of the characters of the account number and then
#  modulo by the number of buckets.
sub hash_calc
{
     my ($acct) = @_;
     my (@sqrs, $hash, $square);

     @sqrs  = map {ord() * ord()}  split //, $acct;
     $hash = 0;
     foreach $square (@sqrs) {$hash += $square}
     $hash %=  BUCKETS;   #  Figure out bucket location.
     return $hash;
}




#  Don't allow duplicate entries to become part of the database.
sub check_for_duplicate
{
     my ($acct) = @_;
     my $searchRec; 

     for (1..RECS_PER_BUCKET)
     {
           $searchRec = <H>;
           if ($acct eq substr($searchRec, 0, 12))
           {
                print "Record already exists!\n";
                return 1;
           }
     }
     seek(H, -(RECS_PER_BUCKET * RECORD_SIZE), 1); # Back to start of bucket 
     return 0;
}





#  Find record. 
sub find_record 
{
     my ($acct) = @_;
     my $searchRec; 

     for (1..RECS_PER_BUCKET)
     {
           $searchRec = <H>;
           if ($acct eq substr($searchRec, 0, 12))
           {
                print substr($searchRec,0,12),"   ", substr($searchRec,13,13);
                return;
           }
     }
     print "Record not found!\n";
}
#  Store record if there's room in the bucket.  Assumes file pointer is
#  positioned at the beginning of the bucket.
sub store_record
{
     my ($record) = @_;

     for (1..RECS_PER_BUCKET)
     {
           $searchRec = <H>;
           if (substr($searchRec, 0, 1) eq " ")  #  Back up and store!
           {
                 seek(H, -(RECORD_SIZE), 1);
                 print H $record;
                 return 1;
           }
     }
     print "Hash bucket overflow!  Could not store record!\n";
     return 0;
}
###############################  Program Session  ############################

$ hash.pl
Enter account balance:    11-11111-111     44.55
Enter account balance:  		22-22222-222    9876.09
Enter account balance: 33-33333-333  54.87
Enter account balance:      98-09876-555    23456.00
Enter account balance: 11-11111-111     55.55
Record already exists!
Enter account balance:     11-1234-111  99.88
Bad account format!
Enter account balance: 		11-6543e-987 88.77
Bad account format!
Enter account balance:   111-11-111
Bad account format!
Enter account balance:      12-12345-678
Bad balance format!
Enter account balance:   333-33333-333   87.987
Bad account format!
Enter account balance: quit
Enter account:   44-44444-444   
Record not found!
Enter account: 11-11111-111
11-11111-111         44.55
Enter account: quit

$ hash.pl
Enter account balance: 33-33333-333
Bad balance format!
Enter account balance: quit
Enter account: 33-33333-333
Record not found!
Enter account: quit