Dicts/ParseDict.py

   1 #! /usr/bin/env python
   2 """
   3 Let's write our own python parser to clean up the pdf (after
   4 pdftotext of course).
   5 Instructions: run pdftotext like this:
   6
   7 $ pdftotext -f 9 -l 81 -raw -nopgbrk 04_06PU.PDF 04_06PU-3.txt
   8
   9 then run the python parser like this:
  10
  11 $ python ParseDict.py 04_06PU.txt dicomV3.dic
  12 """
  13 import re,os
  14
  15 """
  16 PdfTextParser takes as input a text file (produced by pdftotext)
  17 and create as output a clean file (ready to be processed) by
  18 DicomV3Expander
  19 Warning: PdfTextParser does not expand:
  20 - (xxxx,xxxx to xxxx) xxxxxxxxxxxx
  21 or
  22 - (12xx, 3456) comment...
  23
  24 """
  25 class PdfTextParser:
  26   # Cstor
  27   def __init__(self):
  28     self._InputFilename = ''
  29     self._OutputFilename = ''
  30     self._Infile = 0
  31     self._OutLines = []
  32     self._PreviousBuffers = []
  33
  34   def SetInputFileName(self,s):
  35     self._InputFilename = s
  36
  37   def SetOutputFileName(self,s):
  38     self._OutputFilename = s
  39
  40   # Function returning if s is a comment for sure
  41   def IsAComment(self,s):
  42     #print s,  len(s)
  43     if s == "Tag Name VR VM":
  44       return True
  45     elif s == "PS 3.6-2003":
  46       return True
  47     elif s == "PS 3.6-2004":
  48       return True
  49     patt = re.compile('^Page [0-9]+$')
  50     if( patt.match(s) ):
  51       return True
  52     return False
  53
  54   def IsAStartingLine(self,s):
  55     patt = re.compile('^\\([0-9a-fA-Fx]+,[0-9a-fA-F]+\\) (.*)$')
  56     if( patt.match(s) ):
  57       return True
  58     return False
  59
  60   def IsAFullLine(self,s):
  61     patt = re.compile('^\\([0-9a-fA-Fx]+,[0-9a-fA-F]+\\) (.*) [A-Z][A-Z] [0-9]$')
  62     if( patt.match(s) ):
  63       return True
  64     return False
  65
  66   # FIXME this function could be avoided...
  67   def IsSuspicious(self,s):
  68     l = len(s)
  69     if l > 80:
  70       return True
  71     return False
  72
  73   def AddOutputLine(self,s):
  74     assert not self.IsAComment(s)
  75     self._OutLines.append(s + '\n')
  76
  77   def Open(self):
  78     self._Infile = file(self._InputFilename, 'r')
  79     for line in self._Infile.readlines():
  80       line = line[:-1] # remove '\n'
  81       if not self.IsAComment( line ):
  82         if self.IsAStartingLine(line):
  83           #print "Previous buffer:",self._PreviousBuffers
  84           previousbuffer = ' '.join(self._PreviousBuffers)
  85           if self.IsAStartingLine(previousbuffer):
  86             if not self.IsSuspicious(previousbuffer):
  87               self.AddOutputLine(previousbuffer)
  88             else:
  89               # this case should not happen if I were to rewrite the
  90               # thing I should be able to clean that
  91               #print "Suspicious:", previousbuffer
  92               #print "List is:", self._PreviousBuffers
  93               s = self._PreviousBuffers[0]
  94               if self.IsAFullLine(s):
  95                 # That means we have a weird line that does not start
  96                 # as usual (xxxx,xxxx) therefore we tried constructing
  97                 # a buffer using a the complete previous line...
  98                 #print "Full line:", s
  99                 self.AddOutputLine(s)
 100                 s2 = ' '.join(self._PreviousBuffers[1:])
 101                 #print "Other Full line:", s2
 102                 self.AddOutputLine(s2)
 103               else:
 104                 # we have a suspicioulsy long line, so what that could
 105                 # happen, let's check:
 106                 if self.IsAFullLine(previousbuffer):
 107                   self.AddOutputLine(previousbuffer)
 108                 else:
 109                   # This is the only case where we do not add
 110                   # previousbuffer to the _OutLines
 111                   print "Suspicious and Not a full line:", s
 112           else:
 113             if previousbuffer:
 114               print "Not a buffer:", previousbuffer
 115           # We can clean buffer, since only the case 'suspicious' +
 116           # 'Not a full line' has not added buffer to the list
 117           self._PreviousBuffers = []
 118           # In all cases save the line for potentially growing this line
 119           assert not self.IsAComment(line)
 120           self._PreviousBuffers.append(line)
 121         else:
 122           #print "Not a line",line
 123           assert not self.IsAComment(line)
 124           self._PreviousBuffers.append(line)
 125       else:
 126         #print "Comment:",line
 127         previousbuffer = ' '.join(self._PreviousBuffers)
 128         if previousbuffer and self.IsAStartingLine(previousbuffer):
 129           #print "This line is added:", previousbuffer
 130           self.AddOutputLine( previousbuffer )
 131         else:
 132           #print "Line is comment:", line
 133           print "Buffer is:", previousbuffer
 134         # Ok this is a comment we can safely clean the buffer:
 135         self._PreviousBuffers = []
 136     self.Write()
 137
 138   def Write(self):
 139     outfile = file(self._OutputFilename, 'w')
 140     outfile.writelines( self._OutLines )
 141     outfile.close()
 142     self._Infile.close()
 143
 144   # Main function to call for parsing
 145   def Parse(self):
 146     self.Open()
 147
 148 """
 149 subclass
 150 """
 151 class UIDParser(PdfTextParser):
 152   def IsAStartingLine(self,s):
 153     patt = re.compile('^1.2.840.10008.[0-9.]+ (.*)$')
 154     if( patt.match(s) ):
 155       return True
 156     #print "Is Not:", s
 157     return False
 158
 159   def IsAFullLine(self,s):
 160     patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) PS ?[0-9].1?[0-9]$')
 161     if( patt.match(s) ):
 162       return True
 163     patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) Well-known frame of reference$')
 164     if( patt.match(s) ):
 165       return True
 166     patt = re.compile('^1.2.840.10008.[0-9.]+ (.*) \\(Retired\\)$')
 167     if( patt.match(s) ):
 168       return True
 169     return False
 170
 171   def IsAComment(self,s):
 172     if PdfTextParser.IsAComment(self,s):
 173       return True
 174     # else let's enhance the super class
 175     patt = re.compile('^SPM2 (.*) http(.*)$')
 176     if( patt.match(s) ):
 177       return True
 178     return False
 179
 180   def AddOutputLine(self,s):
 181     if self.IsAFullLine(s):
 182       return PdfTextParser.AddOutputLine(self,s)
 183     print "Discarding:", s
 184
 185
 186 """
 187 TransferSyntaxParser
 188 """
 189 class TransferSyntaxParser(UIDParser):
 190   def IsAFullLine(self,s):
 191     patt = re.compile('^(.*) Transfer Syntax PS ?[0-9].1?[0-9]$')
 192     if patt.match(s):
 193       return UIDParser.IsAStartingLine(self,s)
 194     print "Not a TS:", s
 195     return False
 196
 197 """
 198 Papyrus parser
 199 pdftotext -f 19 -l 41 -raw -nopgbrk /tmp/Papyrus31Specif.pdf /tmp/Papyrus31Specif.txt
 200
 201 I need to do a second pass for pages:
 202 #29 since I need to find [0-9.]+
 203 #40,41 since it start with number in two columns !!
 204 """
 205 class PapyrusParser(PdfTextParser):
 206   def __init__(self):
 207     self._PreviousPage = 0
 208     self._PreviousNumber = 0
 209     PdfTextParser.__init__(self)
 210
 211   def IsAStartingLine(self,s):
 212     patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$')
 213     if( patt.match(s) ):
 214       return True
 215     # After page 39, lines are like:
 216     patt = re.compile('^[0-9x]+ [0-9xA-F]+ .*$')
 217     if( patt.match(s) ):
 218       #print "PAge 39", s
 219       return True
 220     return False
 221
 222   def IsAFullLine(self,s):
 223     patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$')
 224     if( patt.match(s) ):
 225       return True
 226     # After page 39, lines are like:
 227     patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9].*$')
 228     if( patt.match(s) ):
 229       #print "PAge 39", s
 230       return True
 231     return False
 232
 233   def IsAComment(self,s):
 234     # dummy case:
 235     if s == 'Attribute Name Tag Type Attribute Description':
 236       #print "Dummy", s
 237       return True
 238     patt = re.compile('^.*ANNEXE.*$')
 239     if patt.match(s):
 240       return True
 241     # Indicate page #, spaces ending with only one number
 242     # Sometime there is a line with only one number, we need to
 243     # make sure that page # is strictly increasing
 244     patt = re.compile('^[1-9][0-9]+$')
 245     if( patt.match(s) ):
 246       p = eval(s)
 247       if( p > self._PreviousPage):
 248         #print "Page #", p
 249         self._PreviousNumber = 0
 250         self._PreviousPage = p
 251         return True
 252 #      else:
 253 #        print "PAGE ERROR:", s
 254     # Now within each page there is a comment that start with a #
 255     # let's do the page approach wich reset at each page
 256     patt = re.compile('^[0-9]+$')
 257     if( patt.match(s) ):
 258       if( eval(s) > self._PreviousNumber):
 259         #print "Number #", eval(s)
 260         self._PreviousNumber = eval(s)
 261         return True
 262       #else:
 263       #  print "ERROR:", s
 264     return False
 265
 266   def AddOutputLine(self,s):
 267     assert not self.IsAComment(s)
 268     s = s.replace('\n','')
 269     #print "REMOVE return:", s
 270     patt = re.compile('^([A-Za-z \'\(\)]+) (\\([0-9A-F]+,[0-9A-F]+\\)) ([0-9C]+) (.*)$')
 271     m = patt.match(s)
 272     ss = 'dummy (0000,0000) 0'
 273     if m:
 274       ss = m.group(2) + ' ' + m.group(3) + ' ' + m.group(1)
 275     else:
 276       patt = re.compile('^([A-Za-z \'\(\)]+) (\\([0-9A-F]+,[0-9A-F]+\\)) (.*)$')
 277       m = patt.match(s)
 278       if m:
 279         ss = m.group(2) + ' 0 ' + m.group(1)
 280       else:
 281         ss = s
 282         # There is two case one that end with all capital letter
 283         # explaining the 'DEFINED TERMS'
 284         patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9] [A-Z, ]$')
 285         #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9]|1\\-n [A-Z, |3.0]+$')
 286         #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [01n-] [A-Z, |3.0]+$')
 287         if patt.match(s):
 288           print "Match", s
 289           ss = ''
 290     self._OutLines.append(ss + '\n')
 291
 292   def Open(self):
 293     self._Infile = file(self._InputFilename, 'r')
 294     for line in self._Infile.readlines():
 295       line = line[:-1] # remove '\n'
 296       if not self.IsAComment( line ):
 297         if self.IsAStartingLine(line):
 298           #print "Previous buffer:",self._PreviousBuffers
 299           previousbuffer = ' '.join(self._PreviousBuffers)
 300           if self.IsAFullLine(previousbuffer):
 301             self.AddOutputLine(previousbuffer)
 302           else:
 303             if previousbuffer:
 304               print "Not a buffer:", previousbuffer
 305           # We can clean buffer, since only the case 'suspicious' +
 306           # 'Not a full line' has not added buffer to the list
 307           self._PreviousBuffers = []
 308           # In all cases save the line for potentially growing this line
 309           # just to be safe remove any white space at begining of string
 310           assert not self.IsAComment(line)
 311           self._PreviousBuffers.append(line.strip())
 312         else:
 313           #print "Not a line",line
 314           assert not self.IsAComment(line)
 315           # just to be safe remove any white space at begining of string
 316           self._PreviousBuffers.append(line.strip())
 317       else:
 318         #print "Previous buffer:",self._PreviousBuffers
 319         previousbuffer = ' '.join(self._PreviousBuffers)
 320         if previousbuffer and self.IsAStartingLine(previousbuffer):
 321           #print "This line is added:", previousbuffer
 322           self.AddOutputLine( previousbuffer )
 323 #        else:
 324 #          #print "Line is comment:", line
 325 #          print "Buffer is:", previousbuffer
 326         # Ok this is a comment we can safely clean the buffer:
 327         self._PreviousBuffers = []
 328     self.Write()
 329
 330 """
 331 Parser for:
 332 GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT
 333 pdftotext -f 81 -l 90 -raw -nopgbrk 2162114_100r5.pdf 2162114_100r5.txt
 334 """
 335 class GEMSParser(PdfTextParser):
 336 #  def __init__(self):
 337 #    PdfTextParser.__init__(self)
 338
 339   def IsAStartingLine(self,s):
 340     #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$')
 341     patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) +(.*)$')
 342     if( patt.match(s) ):
 343       return True
 344     return False
 345
 346   def IsAFullLine(self,s):
 347     #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$')
 348     patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) [A-Z][A-Z] [0-9]+$')
 349     if( patt.match(s) ):
 350       return True
 351     print "Not full:", s
 352     return False
 353
 354   def IsAComment(self,s):
 355     if PdfTextParser.IsAComment(self,s):
 356       return True
 357     #patt = re.compile('^.*GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT REV 2.2 sm 2288567-100.*$')
 358     #if patt.match(s):
 359     #  return True
 360     patt = re.compile('^.*GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT.*$')
 361     if patt.match(s):
 362       return True
 363     patt = re.compile('^GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT.*$')
 364     if patt.match(s):
 365       return True
 366     patt = re.compile('^Attribute Name Tag VR VM$')
 367     if patt.match(s):
 368       return True
 369     patt = re.compile('^B.[1-9].*Private .*$')
 370     if patt.match(s):
 371       return True
 372     patt = re.compile('^Table B.1.? .* Private .*$')
 373     if patt.match(s):
 374       return True
 375     patt = re.compile('^Note :.*$')
 376     if patt.match(s):
 377       return True
 378     patt = re.compile('^7.11.1$')
 379     if patt.match(s):
 380       return True
 381     return False
 382
 383   def AddOutputLine(self,s):
 384     #print s
 385     assert not self.IsAComment(s)
 386     patt = re.compile('^([A-Za-z0-9 .#(),_/-]+) +\\(([0-9A-F]+), ?([0-9A-F]+)\\) ([A-Z][A-Z]) ([0-9]+)$')
 387     m = patt.match(s)
 388     if m:
 389       ss = m.group(2).lower() + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
 390       self._OutLines.append(ss + '\n')
 391     else:
 392       print 'OOOPs', s
 393
 394
 395 """
 396 This class is meant to expand line like:
 397 - (xxxx,xxxx to xxxx) xxxxxxxxxxxx
 398 or
 399 - (12xx, 3456) comment...
 400
 401 """
 402 class DicomV3Expander:
 403   def __init__(self):
 404     self._InputFilename = ''
 405     self._OutputFilename = ''
 406     self._OutLines = []
 407
 408   def SetInputFileName(self,s):
 409     self._InputFilename = s
 410
 411   def SetOutputFileName(self,s):
 412     self._OutputFilename = s
 413
 414   # Function to turn into lower case a tag:
 415   # ex: (ABCD, EF01) -> (abcd, ef01)
 416   def LowerCaseTag(self,s):
 417     #print "Before:", s[:-1]
 418     patt = re.compile('^(\\([0-9a-fA-F]+,[0-9a-fA-F]+\\))(.*)$')
 419     m = patt.match(s)
 420     if m:
 421       s1 = m.group(1)
 422       s2 = m.group(2)
 423       return s1.lower() + s2
 424     else:
 425       patt = re.compile('^[0-9a-fA-F]+ [0-9a-fA-F]+ [A-Z][A-Z] [0-9n-] .*$')
 426       if patt.match(s):
 427         return s
 428       else:
 429         print "Impossible case:", s
 430         os.sys.exit(1)
 431
 432   def AddOutputLine(self,s):
 433     if s.__class__ == list:
 434       for i in s:
 435         self._OutLines.append(i + '\n')
 436     else:
 437       self._OutLines.append(s + '\n')
 438
 439   # Expand the line approriaetkly and also add it to the
 440   # _OutLines list
 441   def ExpandLine(self, s):
 442     assert s[-1] == '\n'
 443     s = s[:-1]  # remove \n
 444     list = []
 445     if self.NeedToExpansion(s, list):
 446       self.AddOutputLine(list) # list != []
 447     elif self.NeedGroupXXExpansion(s, list):
 448       self.AddOutputLine(list) # list != []
 449     elif self.NeedElemXXExpansion(s, list):
 450       self.AddOutputLine(list) # list != []
 451     else:
 452       self.AddOutputLine(self.LowerCaseTag(s))
 453
 454   # If line is like:
 455   # (0020,3100 to 31FF) Source Image Ids RET
 456   def NeedToExpansion(self,s, list):
 457     patt = re.compile('^\\(([0-9a-fA-F]+),([0-9a-fA-F]+) to ([0-9a-fA-F]+)\\)(.*)$')
 458     m = patt.match(s)
 459     if m:
 460       #print m.groups()
 461       gr = m.group(1)
 462       el_start = '0x'+m.group(2)
 463       el_end = '0x'+m.group(3)
 464       for i in range(eval(el_start), eval(el_end)):
 465         el = hex(i)[2:]
 466         l = '('+gr+','+el+')'+m.group(4)
 467         list.append(l)
 468       return True
 469     return False
 470
 471   # If line is like:
 472   # (50xx,1200) Number of Patient Related Studies IS 1
 473   def NeedGroupXXExpansion(self,s,list):
 474     patt = re.compile('^\\(([0-9a-fA-F]+)xx,([0-9a-fA-F]+)\\)(.*)$')
 475     m = patt.match(s)
 476     if m:
 477       #print m.groups()
 478       gr_start = m.group(1)
 479       el = m.group(2)
 480       #el_start = '0x'+m.group(2)
 481       #el_end = '0x'+m.group(3)
 482       start = '0x'+gr_start+'00'
 483       end   = '0x'+gr_start+'FF'
 484       for i in range(eval(start), eval(end)):
 485         gr = hex(i)[2:]
 486         l = '('+gr+','+el+')'+m.group(3)
 487         #print l
 488         list.append(l)
 489       return True
 490     return False
 491
 492   # If line is like:
 493   # (2001,xx00) Number of Patient Related Studies IS 1
 494   def NeedElemXXExpansion(self,s,list):
 495     patt = re.compile('^([0-9a-fA-F]+) ([0-9a-fA-F]+)xx(.*)$')
 496     m = patt.match(s)
 497     if m:
 498       #print m.groups()
 499       gr = m.group(1)
 500       el_start = m.group(2)
 501       start = '0x00'
 502       end   = '0xFF'
 503       for i in range(eval(start), eval(end)):
 504         el = '%02x'% i
 505         l = '('+gr+','+el_start+el+')'+m.group(3)
 506         print l
 507         list.append(l)
 508       return True
 509     else:
 510       patt = re.compile('^([0-9a-fA-F]+) xx([0-9a-fA-F]+)(.*)$')
 511       m = patt.match(s)
 512       if m:
 513         #print m.groups()
 514         gr = m.group(1)
 515         el_start = m.group(2)
 516         start = '0x00'
 517         end   = '0xFF'
 518         for i in range(eval(start), eval(end)):
 519           el = '%02x'% i
 520           l = '('+gr+','+el+el_start+')'+m.group(3)
 521           print l
 522           list.append(l)
 523         return True
 524     return False
 525
 526   def Write(self):
 527     outfile = file(self._OutputFilename, 'w')
 528     outfile.writelines( self._OutLines )
 529     outfile.close()
 530
 531   def Expand(self):
 532     infile = file(self._InputFilename,'r')
 533     for line in infile.readlines():
 534       # ExpandLine also LowerCase the line
 535       self.ExpandLine(line) # l is [1,n] lines
 536     self.Write()
 537     infile.close()
 538
 539 """
 540 Parse line from a philips document, line are like this:
 541
 542 Syncra Scan Type 2005,10A1 VR = CS, VM = 1
 543 """
 544 class InteraParser:
 545   def __init__(self):
 546     self._InputFilename = ''
 547     self._OutputFilename = ''
 548
 549   def Reformat(self,s):
 550     assert self.IsGood(s)
 551     patt = re.compile("^([A-Za-z0-9 -]+) ([0-9A-Z]+),([0-9A-Z]+) VR = ([A-Z][A-Z]), VM = (.*)$")
 552     m = patt.match(s)
 553     if m:
 554       dicom = m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
 555       return dicom
 556     else:
 557       print "oops"
 558
 559   def IsGood(self,s):
 560     patt = re.compile("^[A-Za-z0-9 -]+ [0-9A-Z]+,[0-9A-Z]+ VR = [A-Z][A-Z], VM = .*$")
 561     if patt.match(s):
 562       return True
 563     print "Not good:", s
 564     return False
 565
 566   def SetInputFileName(self,s):
 567     self._InputFilename = s
 568
 569   def SetOutputFileName(self,s):
 570     self._OutputFilename = s
 571
 572   def Parse(self):
 573     infile = file(self._InputFilename, 'r')
 574     outLines = []
 575     for line in infile.readlines():
 576       print self.Reformat(line)
 577       outLines.append( self.Reformat(line) + '\n' )
 578     outfile = file(self._OutputFilename, 'w')
 579     outfile.writelines( outLines )
 580     outfile.close()
 581
 582 """
 583 Parse line from a dicom3tools document, line are like this:
 584
 585 (0003,0008) VERS="SSPI" VR="US"   VM="1"        Owner="SIEMENS ISI"             Keyword="ISICommandField"                       Name="ISI Command Field"
 586 """
 587 class Dicom3ToolsParser:
 588   def __init__(self):
 589     self._InputFilename = ''
 590     self._OutputFilename = ''
 591
 592   def Reformat(self,s):
 593     assert self.IsGood(s)
 594     patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
 595     m = patt.match(s)
 596     dicom = ''
 597     if m:
 598       # Apparently some have Name == '?', skip those
 599       name = m.group(5)
 600       if name != '?' and name != '? ':
 601         dicom = m.group(1) + ' ' + m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5)
 602       else:
 603         print "oops"
 604     else:
 605       print "oops"
 606     return dicom
 607
 608   def IsGood(self,s):
 609     #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\) VERS=\".*\" VR=\"[A-Z][A-Z]\" VM=\".*\" Owner=\".*\" Keyword=\".*\" Name=\".*\"$")
 610     patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
 611     if patt.match(s):
 612       return True
 613     print "Not good:", s
 614     return False
 615
 616   def SetInputFileName(self,s):
 617     self._InputFilename = s
 618
 619   def SetOutputFileName(self,s):
 620     self._OutputFilename = s
 621
 622   def Parse(self):
 623     infile = file(self._InputFilename, 'r')
 624     outLines = []
 625     for line in infile.readlines():
 626       newline = self.Reformat(line)
 627       print newline
 628       if newline:
 629         outLines.append( newline + '\n' )
 630     outfile = file(self._OutputFilename, 'w')
 631     outfile.writelines( outLines )
 632     outfile.close()
 633
 634 """
 635 Parse line from a PhilipsAdvance document, line are like this:
 636
 637 GE Advance Implementation Version Name (0009,1001) 3 LO 2 n/a
 638 """
 639 class GEAdvanceParser:
 640   def __init__(self):
 641     self._InputFilename = ''
 642     self._OutputFilename = ''
 643
 644   def Reformat(self,s):
 645     assert self.IsGood(s)
 646     #patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
 647     patt = re.compile("^([A-Za-z0-9 ._>]+) \\(([0-9A-F]+),([0-9A-F]+)\\) [0-9] ([A-Z][A-Z]) ([0-9]) .*$")
 648     m = patt.match(s)
 649     dicom = ''
 650     if m:
 651       dicom = m.group(2) + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
 652     else:
 653       print "oops"
 654     return dicom
 655
 656   def IsGood(self,s):
 657     #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
 658     patt = re.compile("^[A-Za-z0-9 ._>]+ \\([0-9A-F]+,[0-9A-F]+\\) [0-9] [A-Z][A-Z] [0-9] .*$")
 659     if patt.match(s):
 660       return True
 661     print "Not good:", s
 662     return False
 663
 664   def SetInputFileName(self,s):
 665     self._InputFilename = s
 666
 667   def SetOutputFileName(self,s):
 668     self._OutputFilename = s
 669
 670   def Parse(self):
 671     infile = file(self._InputFilename, 'r')
 672     outLines = []
 673     for line in infile.readlines():
 674       newline = self.Reformat(line)
 675       #print newline
 676       if newline:
 677         outLines.append( newline + '\n' )
 678     outfile = file(self._OutputFilename, 'w')
 679     outfile.writelines( outLines )
 680     outfile.close()
 681
 682 if __name__ == "__main__":
 683   argc = len(os.sys.argv )
 684   if ( argc < 3 ):
 685     print "Sorry, wrong list of args"
 686     os.sys.exit(1) #error
 687
 688   inputfilename = os.sys.argv[1]
 689   outputfilename = os.sys.argv[2]
 690   tempfile = "/tmp/mytemp"
 691   """
 692   dp = PdfTextParser()
 693   dp.SetInputFileName( inputfilename )
 694   #dp.SetOutputFileName( outputfilename )
 695   dp.SetOutputFileName( tempfile )
 696   dp.Parse()
 697   exp = DicomV3Expander()
 698   #exp.SetInputFileName( tempfile )
 699   exp.SetInputFileName( inputfilename )
 700   exp.SetOutputFileName( outputfilename )
 701   exp.Expand()
 702
 703   dp = TransferSyntaxParser()
 704   dp.SetInputFileName( inputfilename )
 705   dp.SetOutputFileName( outputfilename )
 706   dp.Parse()
 707
 708   dp = PapyrusParser()
 709   dp.SetInputFileName( inputfilename )
 710   dp.SetOutputFileName( outputfilename )
 711   dp.Parse()
 712
 713   dp = InteraParser()
 714   dp.SetInputFileName( inputfilename )
 715   dp.SetOutputFileName( outputfilename )
 716   dp.Parse()
 717   dp = GEMSParser()
 718   dp.SetInputFileName( inputfilename )
 719   dp.SetOutputFileName( outputfilename )
 720   dp.Parse()
 721
 722   """
 723   dp = Dicom3ToolsParser()
 724   dp.SetInputFileName( inputfilename )
 725   dp.SetOutputFileName( outputfilename )
 726   dp.Parse()
 727
 728   """
 729   dp = GEAdvanceParser()
 730   dp.SetInputFileName( inputfilename )
 731   dp.SetOutputFileName( outputfilename )
 732   dp.Parse()
 733   """
 734
 735   #print dp.IsAStartingLine( "(0004,1212) File-set Consistency Flag US 1\n" )