Package ziggy :: Package hdmc :: Module hdmc_tests
[hide private]
[frames] | no frames]

Source Code for Module ziggy.hdmc.hdmc_tests

  1  ''' 
  2  Created on Jul 29, 2010 
  3   
  4  @author: dwmclary 
  5  ''' 
  6  import unittest 
  7  import os 
  8  import hdmc 
  9  import hdfs.hdfs_access as hdfs 
 10  import hadoop_config as config 
 11  from glob import glob 
 12   
13 -class HDMCTest(unittest.TestCase):
14
15 - def runTest(self):
16 self.setUp() 17 self.testSubmitNoSupportingFiles() 18 self.tearDown()
19
20 - def setUp(self):
21 self.wd = os.getcwd() 22 self.script = self.wd+"/test/numpy_random_means.py" 23 self.reducer = self.wd+"/test/numpy_mean_reduction.py" 24 self.output_file = self.wd+"/test/random_means" 25 self.checkpoint_names = map(str, range(1,20)) 26 self.checkpoint_dir = config.shared_tmp_space+"/"+os.getlogin()+"/hdmc_checkpoints" 27 pass
28 29
30 - def tearDown(self):
31 pass
32 33
34 - def testMakeFrame(self):
35 hdmc.make_frame(self.script) 36 self.assertTrue(os.path.isfile(self.wd+"/frame.py"))
37
38 - def testMakeCheckpointFrame(self):
39 hdmc.make_checkpointing_frame(self.script, self.checkpoint_names, self.checkpoint_dir) 40 self.assertTrue(os.path.isfile(self.wd+"/checkpoint_frame.py"))
41
42 - def testCreateDummyData(self):
43 hdfs.rm("dummy") 44 hdmc.create_dummy_data() 45 dummy_data = hdfs.cat("dummy")["stdout"] 46 self.assertEqual("dummy data", dummy_data.rstrip())
47
49 os.system('rmdir '+self.checkpoint_dir) 50 checkpoint_dir = hdmc.set_checkpoint_directory(self.output_file) 51 self.assertTrue(os.path.exists(checkpoint_dir))
52
53 - def testDownloadHDFSData(self):
54 hdmc.download_hdfs_data(self.wd+"/test/dummy") 55 self.assertTrue(os.path.isfile(self.wd+"/test/dummy")) 56 os.system('rm '+self.wd+'/test/dummy') 57 self.assertFalse(os.path.isfile(self.wd+"/test/dummy"))
58
60 hdfs.rm("random_means") 61 hdmc.submit_inline(self.script, self.output_file, iterations=200) 62 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
63
65 hdfs.rm("random_means") 66 hdmc.submit_inline(self.script, self.output_file, iterations=200, reduction_script = self.reducer) 67 self.assertTrue(os.path.exists(self.wd+"/test/random_means"))
68
69 - def testSubmitCheckpoints(self):
70 hdfs.rm("line_counts") 71 file_list = glob(self.wd+"/test/gutenberg/*") 72 self.script = self.wd+"/test/line_counter.py" 73 self.output_file = self.wd+"/test/line_counts" 74 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, []) 75 self.assertEqual(len(file_list), len(checkpoints)) 76 self.assertTrue(os.path.exists(self.wd+"/test/line_counts")) 77 hadoop_result_file = self.wd+"/test/line_counts" 78 master_result_file = self.wd+"/test/wc_output.dat" 79 hadoop_results = {} 80 master_results = {} 81 82 for line in open(master_result_file).readlines(): 83 if len(line.rstrip()) > 0: 84 entry = line.split() 85 master_results[entry[1]] = int(entry[0]) 86 for line in open(hadoop_result_file).readlines(): 87 if len(line.rstrip()) > 0: 88 entry = line.split() 89 hadoop_results[entry[1]] = int(entry[0]) 90 91 for key in master_results.keys(): 92 self.assertEqual(master_results[key], hadoop_results[key])
93
95 url_list = ["http://www.gutenberg.org/files/8713/8713-h/8713-h.htm",\ 96 "http://www.gutenberg.org/files/10554/10554-h/10554-h.htm",\ 97 "http://www.gutenberg.org/ebooks/8164.html.gen",\ 98 "http://www.gutenberg.org/files/5200/5200-h/5200-h.htm",\ 99 "http://www.gutenberg.org/ebooks/100.txt.utf8",\ 100 "http://www.gutenberg.org/files/25717/25717-h/25717-h.htm",\ 101 "http://www.gutenberg.org/files/221/221-h/221-h.htm"] 102 hdfs.rm("book_contents") 103 os.system("rm "+self.wd+"/test/book_contents") 104 self.script = self.wd+"/test/fetch_books.py" 105 self.output_file = self.wd+"/test/book_contents" 106 self.supporting_files = [] 107 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, url_list,self.supporting_files, files=False) 108 self.assertEqual(len(url_list), len(checkpoints)) 109 self.assertTrue(os.path.exists(self.wd+"/test/book_contents"))
110 111
113 hdfs.rm("line_total") 114 file_list = glob(self.wd+"/test/gutenberg/*") 115 self.script = self.wd+"/test/line_counter.py" 116 self.output_file = self.wd+"/test/line_total" 117 self.reducer = self.wd+"/test/line_sum.py" 118 checkpoints = hdmc.submit_checkpoint_inline(self.script, self.output_file, file_list, reduction_script = self.reducer, arguments="") 119 self.assertEqual(len(file_list), len(checkpoints)) 120 self.assertTrue(os.path.exists(self.wd+"/test/line_total")) 121 hadoop_result_file = self.wd+"/test/line_total" 122 master_result_file = self.wd+"/test/wc_total.dat" 123 hadoop_results = {} 124 master_results = {} 125 for line in open(master_result_file).readlines(): 126 if len(line.rstrip()) > 0: 127 entry = line.split() 128 master_results[entry[1]] = int(entry[0]) 129 for line in open(hadoop_result_file).readlines(): 130 if len(line.rstrip()) > 0: 131 entry = line.split() 132 hadoop_results[entry[1]] = int(entry[0]) 133 for key in master_results.keys(): 134 self.assertEqual(master_results[key], hadoop_results[key])
135 136 if __name__ == "__main__": 137 #import sys;sys.argv = ['', 'Test.testName'] 138 unittest.main() 139