Fix mistypings

[gdcm.git] / Dicts / ParseDict.py
diff --git a/Dicts/ParseDict.py b/Dicts/ParseDict.py

index 76025bcf2735fe6cb49a27021f2ef502af75a191..2b3924245ab53897026a32b625fb2b949786f117 100644 (file)
--- a/Dicts/ParseDict.py
+++ b/Dicts/ParseDict.py
@@ -212,36 +212,55 @@ class PapyrusParser(PdfTextParser):
      patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
      if( patt.match(s) ):
        return True
+    # After page 39, lines are like:
+    patt = re.compile('^[0-9x]+ [0-9xA-F]+ .*$') 
+    if( patt.match(s) ):
+      #print "PAge 39", s
+      return True
      return False
  
    def IsAFullLine(self,s):
      patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
      if( patt.match(s) ):
        return True
+    # After page 39, lines are like:
+    patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9].*$') 
+    if( patt.match(s) ):
+      #print "PAge 39", s
+      return True
      return False
  
    def IsAComment(self,s):
      # dummy case:
      if s == 'Attribute Name Tag Type Attribute Description':
-      print "Dummy", s
+      #print "Dummy", s
+      return True
+    patt = re.compile('^.*ANNEXE.*$')
+    if patt.match(s):
        return True
      # Indicate page #, spaces ending with only one number
      # Sometime there is a line with only one number, we need to
      # make sure that page # is strictly increasing
      patt = re.compile('^[1-9][0-9]+$') 
      if( patt.match(s) ):
-      if( eval(s) > self._PreviousPage):
-        print "Page #", eval(s)
+      p = eval(s)
+      if( p > self._PreviousPage):
+        #print "Page #", p
          self._PreviousNumber = 0
-        self._PreviousPage = eval(s)
+        self._PreviousPage = p
          return True
+#      else:
+#        print "PAGE ERROR:", s
      # Now within each page there is a comment that start with a #
      # let's do the page approach wich reset at each page
      patt = re.compile('^[0-9]+$') 
      if( patt.match(s) ):
-      print "Number #", eval(s)
-      self._PreviousNumber = eval(s)
-      return True
+      if( eval(s) > self._PreviousNumber):
+        #print "Number #", eval(s)
+        self._PreviousNumber = eval(s)
+        return True
+      #else:
+      #  print "ERROR:", s
      return False
  
    def AddOutputLine(self,s):
@@ -258,6 +277,16 @@ class PapyrusParser(PdfTextParser):
        m = patt.match(s)
        if m:
          ss = m.group(2) + ' 0 ' + m.group(1)
+      else:
+        ss = s
+        # There is two case one that end with all capital letter
+        # explaining the 'DEFINED TERMS'
+        patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9] [A-Z, ]$') 
+        #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [0-9]|1\\-n [A-Z, |3.0]+$') 
+        #patt = re.compile('^[0-9x]+ [0-9xA-F]+ .* [A-Z][A-Z] [01n-] [A-Z, |3.0]+$') 
+        if patt.match(s):
+          print "Match", s
+          ss = ''
      self._OutLines.append(ss + '\n')
  
    def Open(self):
@@ -298,6 +327,71 @@ class PapyrusParser(PdfTextParser):
          self._PreviousBuffers = []
      self.Write()
  
+"""
+Parser for:
+GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT
+pdftotext -f 81 -l 90 -raw -nopgbrk 2162114_100r5.pdf 2162114_100r5.txt
+"""
+class GEMSParser(PdfTextParser):
+#  def __init__(self):
+#    PdfTextParser.__init__(self)
+
+  def IsAStartingLine(self,s):
+    #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
+    patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) +(.*)$')
+    if( patt.match(s) ):
+      return True
+    return False
+
+  def IsAFullLine(self,s):
+    #patt = re.compile('^[A-Za-z \'\(\)]+ +\\([0-9A-F]+,[0-9A-F]+\\) +(.*)$') 
+    patt = re.compile('^[A-Za-z0-9 .#(),_/-]+ +\\([0-9A-F]+, ?[0-9A-F]+\\) [A-Z][A-Z] [0-9]+$') 
+    if( patt.match(s) ):
+      return True
+    print "Not full:", s
+    return False
+
+  def IsAComment(self,s):
+    if PdfTextParser.IsAComment(self,s):
+      return True
+    #patt = re.compile('^.*GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT REV 2.2 sm 2288567-100.*$')
+    #if patt.match(s):
+    #  return True
+    patt = re.compile('^.*GE Medical Systems HISPEED ADVANTAGE CT/i CONFORMANCE STATEMENT.*$') 
+    if patt.match(s):
+      return True
+    patt = re.compile('^GE Medical Systems LightSpeed QX/i CONFORMANCE STATEMENT.*$')
+    if patt.match(s):
+      return True
+    patt = re.compile('^Attribute Name Tag VR VM$')
+    if patt.match(s):
+      return True
+    patt = re.compile('^B.[1-9].*Private .*$')
+    if patt.match(s):
+      return True
+    patt = re.compile('^Table B.1.? .* Private .*$')
+    if patt.match(s):
+      return True
+    patt = re.compile('^Note :.*$')
+    if patt.match(s):
+      return True
+    patt = re.compile('^7.11.1$')
+    if patt.match(s):
+      return True
+    return False
+
+  def AddOutputLine(self,s):
+    #print s
+    assert not self.IsAComment(s)
+    patt = re.compile('^([A-Za-z0-9 .#(),_/-]+) +\\(([0-9A-F]+), ?([0-9A-F]+)\\) ([A-Z][A-Z]) ([0-9]+)$') 
+    m = patt.match(s)
+    if m:
+      ss = m.group(2).lower() + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
+      self._OutLines.append(ss + '\n')
+    else:
+      print 'OOOPs', s
+
+
  """
  This class is meant to expand line like:
  - (xxxx,xxxx to xxxx) xxxxxxxxxxxx
@@ -328,8 +422,12 @@ class DicomV3Expander:
        s2 = m.group(2)
        return s1.lower() + s2
      else:
-      print "Impossible case:", s
-      os.sys.exit(1)
+      patt = re.compile('^[0-9a-fA-F]+ [0-9a-fA-F]+ [A-Z][A-Z] [0-9n-] .*$')
+      if patt.match(s):
+        return s
+      else:
+        print "Impossible case:", s
+        os.sys.exit(1)
  
    def AddOutputLine(self,s):
      if s.__class__ == list:
@@ -346,7 +444,9 @@ class DicomV3Expander:
      list = []
      if self.NeedToExpansion(s, list):
        self.AddOutputLine(list) # list != []
-    elif self.NeedXXExpansion(s, list):
+    elif self.NeedGroupXXExpansion(s, list):
+      self.AddOutputLine(list) # list != []
+    elif self.NeedElemXXExpansion(s, list):
        self.AddOutputLine(list) # list != []
      else:
        self.AddOutputLine(self.LowerCaseTag(s))
@@ -370,7 +470,7 @@ class DicomV3Expander:
  
    # If line is like:
    # (50xx,1200) Number of Patient Related Studies IS 1
-  def NeedXXExpansion(self,s,list):
+  def NeedGroupXXExpansion(self,s,list):
      patt = re.compile('^\\(([0-9a-fA-F]+)xx,([0-9a-fA-F]+)\\)(.*)$')
      m = patt.match(s)
      if m:
@@ -389,6 +489,40 @@ class DicomV3Expander:
        return True
      return False
  
+  # If line is like:
+  # (2001,xx00) Number of Patient Related Studies IS 1
+  def NeedElemXXExpansion(self,s,list):
+    patt = re.compile('^([0-9a-fA-F]+) ([0-9a-fA-F]+)xx(.*)$')
+    m = patt.match(s)
+    if m:
+      #print m.groups()
+      gr = m.group(1)
+      el_start = m.group(2)
+      start = '0x00'
+      end   = '0xFF'
+      for i in range(eval(start), eval(end)):
+        el = '%02x'% i
+        l = '('+gr+','+el_start+el+')'+m.group(3)
+        print l
+        list.append(l)
+      return True
+    else:
+      patt = re.compile('^([0-9a-fA-F]+) xx([0-9a-fA-F]+)(.*)$')
+      m = patt.match(s)
+      if m:
+        #print m.groups()
+        gr = m.group(1)
+        el_start = m.group(2)
+        start = '0x00'
+        end   = '0xFF'
+        for i in range(eval(start), eval(end)):
+          el = '%02x'% i
+          l = '('+gr+','+el+el_start+')'+m.group(3)
+          print l
+          list.append(l)
+        return True
+    return False
+
    def Write(self):
      outfile = file(self._OutputFilename, 'w')
      outfile.writelines( self._OutLines )
@@ -402,7 +536,149 @@ class DicomV3Expander:
      self.Write()
      infile.close()
  
+"""
+Parse line from a philips document, line are like this:
+
+Syncra Scan Type 2005,10A1 VR = CS, VM = 1
+"""
+class InteraParser:
+  def __init__(self):
+    self._InputFilename = ''
+    self._OutputFilename = ''
+
+  def Reformat(self,s):
+    assert self.IsGood(s)
+    patt = re.compile("^([A-Za-z0-9 -]+) ([0-9A-Z]+),([0-9A-Z]+) VR = ([A-Z][A-Z]), VM = (.*)$")
+    m = patt.match(s)
+    if m:
+      dicom = m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
+      return dicom
+    else:
+      print "oops"
+
+  def IsGood(self,s):
+    patt = re.compile("^[A-Za-z0-9 -]+ [0-9A-Z]+,[0-9A-Z]+ VR = [A-Z][A-Z], VM = .*$")
+    if patt.match(s):
+      return True
+    print "Not good:", s
+    return False
+
+  def SetInputFileName(self,s):
+    self._InputFilename = s
  
+  def SetOutputFileName(self,s):
+    self._OutputFilename = s
+  
+  def Parse(self):
+    infile = file(self._InputFilename, 'r')
+    outLines = []
+    for line in infile.readlines():
+      print self.Reformat(line)
+      outLines.append( self.Reformat(line) + '\n' )
+    outfile = file(self._OutputFilename, 'w')
+    outfile.writelines( outLines )
+    outfile.close()
+ 
+"""
+Parse line from a dicom3tools document, line are like this:
+
+(0003,0008) VERS="SSPI" VR="US"   VM="1"        Owner="SIEMENS ISI"             Keyword="ISICommandField"                       Name="ISI Command Field"
+"""
+class Dicom3ToolsParser:
+  def __init__(self):
+    self._InputFilename = ''
+    self._OutputFilename = ''
+
+  def Reformat(self,s):
+    assert self.IsGood(s)
+    patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
+    m = patt.match(s)
+    dicom = ''
+    if m:
+      # Apparently some have Name == '?', skip those
+      name = m.group(5)
+      if name != '?' and name != '? ':
+        dicom = m.group(1) + ' ' + m.group(2) + ' ' + m.group(3) + ' ' + m.group(4) + ' ' + m.group(5)
+      else:
+        print "oops"
+    else:
+      print "oops"
+    return dicom
+
+  def IsGood(self,s):
+    #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\) VERS=\".*\" VR=\"[A-Z][A-Z]\" VM=\".*\" Owner=\".*\" Keyword=\".*\" Name=\".*\"$")
+    patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
+    if patt.match(s):
+      return True
+    print "Not good:", s
+    return False
+
+  def SetInputFileName(self,s):
+    self._InputFilename = s
+
+  def SetOutputFileName(self,s):
+    self._OutputFilename = s
+  
+  def Parse(self):
+    infile = file(self._InputFilename, 'r')
+    outLines = []
+    for line in infile.readlines():
+      newline = self.Reformat(line)
+      print newline
+      if newline:
+        outLines.append( newline + '\n' )
+    outfile = file(self._OutputFilename, 'w')
+    outfile.writelines( outLines )
+    outfile.close()
+ 
+"""
+Parse line from a PhilipsAdvance document, line are like this:
+
+GE Advance Implementation Version Name (0009,1001) 3 LO 2 n/a
+"""
+class GEAdvanceParser:
+  def __init__(self):
+    self._InputFilename = ''
+    self._OutputFilename = ''
+
+  def Reformat(self,s):
+    assert self.IsGood(s)
+    #patt = re.compile("^\(([0-9a-f]+),([0-9a-f]+)\)\s+VERS=\".*\"\s+VR=\"([A-Z][A-Z])\"\s+VM=\"(.*)\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\"(.*)\"$")
+    patt = re.compile("^([A-Za-z0-9 ._>]+) \\(([0-9A-F]+),([0-9A-F]+)\\) [0-9] ([A-Z][A-Z]) ([0-9]) .*$")
+    m = patt.match(s)
+    dicom = ''
+    if m:
+      dicom = m.group(2) + ' ' + m.group(3).lower() + ' ' + m.group(4) + ' ' + m.group(5) + ' ' + m.group(1)
+    else:
+      print "oops"
+    return dicom
+
+  def IsGood(self,s):
+    #patt = re.compile("^\([0-9a-f]+,[0-9a-f]+\)\s+VERS=\".*\"\s+VR=\"[A-Z][A-Z]\"\s+VM=\".*\"\s+Owner=\".*\"\s+Keyword=\".*\"\s+Name=\".*\".*$")
+    patt = re.compile("^[A-Za-z0-9 ._>]+ \\([0-9A-F]+,[0-9A-F]+\\) [0-9] [A-Z][A-Z] [0-9] .*$")
+    if patt.match(s):
+      return True
+    print "Not good:", s
+    return False
+
+  def SetInputFileName(self,s):
+    self._InputFilename = s
+
+  def SetOutputFileName(self,s):
+    self._OutputFilename = s
+  
+  def Parse(self):
+    infile = file(self._InputFilename, 'r')
+    outLines = []
+    for line in infile.readlines():
+      newline = self.Reformat(line)
+      #print newline
+      if newline:
+        outLines.append( newline + '\n' )
+    outfile = file(self._OutputFilename, 'w')
+    outfile.writelines( outLines )
+    outfile.close()
+ 
  if __name__ == "__main__":
    argc = len(os.sys.argv )
    if ( argc < 3 ):
@@ -418,9 +694,9 @@ if __name__ == "__main__":
    #dp.SetOutputFileName( outputfilename )
    dp.SetOutputFileName( tempfile )
    dp.Parse()
-
    exp = DicomV3Expander()
-  exp.SetInputFileName( tempfile )
+  #exp.SetInputFileName( tempfile )
+  exp.SetInputFileName( inputfilename )
    exp.SetOutputFileName( outputfilename )
    exp.Expand()
  
@@ -428,10 +704,32 @@ if __name__ == "__main__":
    dp.SetInputFileName( inputfilename )
    dp.SetOutputFileName( outputfilename )
    dp.Parse()
-  """
+
    dp = PapyrusParser()
    dp.SetInputFileName( inputfilename )
    dp.SetOutputFileName( outputfilename )
    dp.Parse()
  
+  dp = InteraParser()
+  dp.SetInputFileName( inputfilename )
+  dp.SetOutputFileName( outputfilename )
+  dp.Parse()
+  dp = GEMSParser()
+  dp.SetInputFileName( inputfilename )
+  dp.SetOutputFileName( outputfilename )
+  dp.Parse()
+
+  """
+  dp = Dicom3ToolsParser()
+  dp.SetInputFileName( inputfilename )
+  dp.SetOutputFileName( outputfilename )
+  dp.Parse()
+
+  """
+  dp = GEAdvanceParser()
+  dp.SetInputFileName( inputfilename )
+  dp.SetOutputFileName( outputfilename )
+  dp.Parse()
+  """
+
    #print dp.IsAStartingLine( "(0004,1212) File-set Consistency Flag US 1\n" )