* Builder/ : add something to facilitate the RPM creation of gdcm

[gdcm.git] / src / gdcmDocument.cxx
diff --git a/src/gdcmDocument.cxx b/src/gdcmDocument.cxx

index 9c12f0ce8a8aa4f97e5210cd1c9dfec6311580ac..f87337747ee26e66f5ff2c46f3f66208e3463044 100644 (file)
--- a/src/gdcmDocument.cxx
+++ b/src/gdcmDocument.cxx
@@ -3,8 +3,8 @@
    Program:   gdcm
    Module:    $RCSfile: gdcmDocument.cxx,v $
    Language:  C++
-  Date:      $Date: 2005/02/07 09:51:03 $
-  Version:   $Revision: 1.225 $
+  Date:      $Date: 2005/04/05 10:20:53 $
+  Version:   $Revision: 1.234 $
                                                                                  
    Copyright (c) CREATIS (Centre de Recherche et d'Applications en Traitement de
    l'Image). All rights reserved. See Doc/License.txt or
@@ -32,15 +32,6 @@
  #include <iomanip>
  #include <fstream>
  
-// For nthos:
-#if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__MINGW32__) 
-   #include <winsock.h>
-#endif
-
-#ifdef CMAKE_HAVE_NETINET_IN_H
-   #include <netinet/in.h>
-#endif
-
  namespace gdcm 
  {
  //-----------------------------------------------------------------------------
@@ -49,21 +40,68 @@ namespace gdcm
  
  // Refer to Document::SetMaxSizeLoadEntry()
  const unsigned int Document::MAX_SIZE_LOAD_ELEMENT_VALUE = 0xfff; // 4096
-const unsigned int Document::MAX_SIZE_PRINT_ELEMENT_VALUE = 0x7fffffff;
  
  //-----------------------------------------------------------------------------
  // Constructor / Destructor
  // Constructors and destructors are protected to avoid user to invoke directly
+
  /**
- * \brief   constructor  
- * @param   filename 'Document' (File or DicomDir) to be opened for parsing
+ * \brief This default constructor neither loads nor parses the file. 
+ *        You should then invoke \ref Document::Load.
+ *         
   */
-Document::Document( std::string const &filename ) 
+Document::Document() 
           :ElementSet(-1)
  {
-   SetMaxSizeLoadEntry(MAX_SIZE_LOAD_ELEMENT_VALUE); 
-   Filename = filename;
+   Fp = 0;
+
+   SetMaxSizeLoadEntry(MAX_SIZE_LOAD_ELEMENT_VALUE);
     Initialize();
+   SwapCode = 1234;
+   Filetype = ExplicitVR;
+   Group0002Parsed = false;
+   LoadMode = 0x00000000; // default : load everything, later
+}
+
+/**
+ * \brief   Constructor (not to break the API) 
+ * @param   filename 'Document' (File or DicomDir) to be opened for parsing
+ */
+Document::Document( std::string const &filename )
+         :ElementSet(-1) 
+{
+   Fp = 0;
+
+   SetMaxSizeLoadEntry(MAX_SIZE_LOAD_ELEMENT_VALUE);
+   Initialize();
+   SwapCode = 1234;
+   Filetype = ExplicitVR;
+   Group0002Parsed = false;
+   LoadMode = 0x00000000; // Load everything
+
+   Load(filename); 
+}
+/**
+ * \brief   Canonical destructor.
+ */
+Document::~Document ()
+{
+   RefPubDict = NULL;
+   RefShaDict = NULL;
+}
+
+//-----------------------------------------------------------------------------
+// Public
+
+/**
+ * \brief   Loader  
+ * @param   filename 'Document' (File or DicomDir) to be opened for parsing
+ */
+void Document::Load( std::string const &filename ) 
+{
+   Filename = filename;
+
+   // We should clean out anything that already exists.
  
     Fp = 0;
     if ( !OpenFile() )
@@ -74,20 +112,20 @@ Document::Document( std::string const &filename )
     Group0002Parsed = false;
  
     gdcmWarningMacro( "Starting parsing of file: " << Filename.c_str());
-  // Fp->seekg( 0,  std::ios::beg);
-   
+
     Fp->seekg(0, std::ios::end);
-   long lgt = Fp->tellg();
-           
+   long lgt = Fp->tellg();       // total length of the file
+
     Fp->seekg(0, std::ios::beg);
  
     CheckSwap();
-   long beg = Fp->tellg();
-   lgt -= beg;
-   
+   long beg = Fp->tellg();      // just after DICOM preamble (if any)
+
+   lgt -= beg;                  // remaining length to parse    
+
     ParseDES( this, beg, lgt, false); // Loading is done during parsing
  
-   Fp->seekg( 0,  std::ios::beg);
+   Fp->seekg( 0, std::ios::beg);
     
     // Load 'non string' values
        
@@ -130,8 +168,8 @@ Document::Document( std::string const &filename )
     // Specific code to allow gdcm to read ACR-LibIDO formated images
     // Note: ACR-LibIDO is an extension of the ACR standard that was
     //       used at CREATIS. For the time being (say a couple years)
-   //       we keep this kludge to allow a smooth move to gdcm for
-   //       CREATIS developpers (sorry folks).
+   //       we keep this kludge to allow CREATIS users 
+   //       reading their old images.
     //
     // if recognition code tells us we deal with a LibIDO image
     // we switch lineNumber and columnNumber
@@ -151,33 +189,6 @@ Document::Document( std::string const &filename )
     // --- End of ACR-LibIDO kludge --- 
  }
  
-/**
- * \brief This default constructor doesn't parse the file. You should
- *        then invoke \ref Document::SetFileName and then the parsing.
- */
-Document::Document() 
-         :ElementSet(-1)
-{
-   Fp = 0;
-
-   SetMaxSizeLoadEntry(MAX_SIZE_LOAD_ELEMENT_VALUE);
-   Initialize();
-   SwapCode = 1234;
-   Filetype = ExplicitVR;
-   Group0002Parsed = false;
-}
-
-/**
- * \brief   Canonical destructor.
- */
-Document::~Document ()
-{
-   RefPubDict = NULL;
-   RefShaDict = NULL;
-}
-
-//-----------------------------------------------------------------------------
-// Public
  /**
   * \brief   Get the public dictionary used
   */
@@ -353,7 +364,9 @@ uint16_t Document::SwapShort(uint16_t a)
  {
     if ( SwapCode == 4321 || SwapCode == 2143 )
     {
-      a = ((( a << 8 ) & 0xff00 ) | (( a >> 8 ) & 0x00ff ) );
+      //a = ((( a << 8 ) & 0xff00 ) | (( a >> 8 ) & 0x00ff ) );
+      // Save CPU time
+      a = ( a << 8 ) | ( a >> 8 );
     }
     return a;
  }
@@ -370,11 +383,15 @@ uint32_t Document::SwapLong(uint32_t a)
        case 1234 :
           break;
        case 4321 :
-         a=( ((a<<24) & 0xff000000) | ((a<<8)  & 0x00ff0000) | 
-             ((a>>8)  & 0x0000ff00) | ((a>>24) & 0x000000ff) );
+//         a=( ((a<<24) & 0xff000000) | ((a<<8)  & 0x00ff0000) | 
+//             ((a>>8)  & 0x0000ff00) | ((a>>24) & 0x000000ff) );
+// save CPU time
+         a=( ( a<<24)               | ((a<<8)  & 0x00ff0000) | 
+             ((a>>8)  & 0x0000ff00) |  (a>>24)                );
           break;   
        case 3412 :
-         a=( ((a<<16) & 0xffff0000) | ((a>>16) & 0x0000ffff) );
+//       a=( ((a<<16) & 0xffff0000) | ((a>>16) & 0x0000ffff) );
+         a=( (a<<16)                | (a>>16)  );
           break;  
        case 2143 :
           a=( ((a<< 8) & 0xff00ff00) | ((a>>8) & 0x00ff00ff)  );
@@ -470,7 +487,7 @@ bool Document::CloseFile()
        delete Fp;
        Fp = 0;
     }
-   return true; //FIXME how do we detect a non-closed ifstream ?
+   return true;
  }
  
  /**
@@ -559,7 +576,6 @@ void Document::LoadEntryBinArea(BinEntry *elem)
        return;
     }
  
-   /// \todo check the result 
     Fp->read((char*)a, l);
     if( Fp->fail() || Fp->eof())
     {
@@ -771,17 +787,20 @@ void Document::Initialize()
  
  /**
   * \brief   Parses a DocEntrySet (Zero-level DocEntries or SQ Item DocEntries)
- * @return  length of the parsed set. 
+ * @param set DocEntrySet we are going to parse ('zero level'   or a SQItem)
+ * @param offset start of parsing
+ * @param l_max  length to parse
+ * @param delim_mode : whether we are in 'delimitor mode' (l=0xffffff) or not
   */ 
  void Document::ParseDES(DocEntrySet *set, long offset, 
                          long l_max, bool delim_mode)
  {
-   DocEntry *newDocEntry = 0;
+   DocEntry *newDocEntry;
     ValEntry *newValEntry;
     BinEntry *newBinEntry;
     SeqEntry *newSeqEntry;
     VRKey vr;
-   bool used = false;
+   bool used;
  
     while (true)
     {
@@ -844,6 +863,7 @@ void Document::ParseDES(DocEntrySet *set, long offset,
           else
           {
           /////////////////////// ValEntry
+
              // When "set" is a Document, then we are at the top of the
              // hierarchy and the Key is simply of the form ( group, elem )...
              if ( dynamic_cast< Document* > ( set ) )
@@ -861,10 +881,32 @@ void Document::ParseDES(DocEntrySet *set, long offset,
               
              LoadDocEntry( newValEntry );
              bool delimitor=newValEntry->IsItemDelimitor();
+
+            if ( LoadMode & NO_SHADOW ) // User asked to skip, if possible, 
+                                        // shadow groups ( if possible :
+                                        // whether element 0x0000 exits)
+            {
+               if ( newValEntry->GetGroup()%2 != 0 )
+               {
+                  if ( newValEntry->GetElement() == 0x0000 )
+                  {
+                     std::string strLgrGroup = newValEntry->GetValue();
+                     int lgrGroup;
+                     if ( strLgrGroup != GDCM_UNFOUND)
+                     {
+                        lgrGroup = atoi(strLgrGroup.c_str());
+                        Fp->seekg(lgrGroup , std::ios::cur);
+                        used = false;
+                        continue;
+                     }
+                  }
+               }
+             }
+
              if( !set->AddEntry( newValEntry ) )
              {
                // If here expect big troubles
-              //delete newValEntry; //otherwise mem leak
+              // delete newValEntry; //otherwise mem leak
                used=false;
              }
  
@@ -888,7 +930,7 @@ void Document::ParseDES(DocEntrySet *set, long offset,
        else
        {
           // VR = "SQ"
-         unsigned long l = newDocEntry->GetReadLength();            
+         unsigned long l = newDocEntry->GetReadLength();          
           if ( l != 0 ) // don't mess the delim_mode for zero-length sequence
           {
              if ( l == 0xffffffff )
@@ -900,6 +942,14 @@ void Document::ParseDES(DocEntrySet *set, long offset,
                delim_mode = false;
              }
           }
+ 
+         if ( (LoadMode & NO_SEQ) && ! delim_mode ) // User asked to skip SQ
+         {
+            Fp->seekg( l, std::ios::cur);
+            used = false;
+            continue;
+          }
+         
           // no other way to create it ...
           newSeqEntry->SetDelimitorMode( delim_mode );
  
@@ -907,9 +957,8 @@ void Document::ParseDES(DocEntrySet *set, long offset,
           // is a Document, then we are building the first depth level.
           // Hence the SeqEntry we are building simply has a depth
           // level of one:
-         if (/*Document *dummy =*/ dynamic_cast< Document* > ( set ) )
+         if ( dynamic_cast< Document* > ( set ) )
           {
-            //(void)dummy;
              newSeqEntry->SetDepthLevel( 1 );
              newSeqEntry->SetKey( newSeqEntry->GetKey() );
           }
@@ -933,6 +982,7 @@ void Document::ParseDES(DocEntrySet *set, long offset,
           {
              used = false;
           }
+
           if ( !delim_mode && ((long)(Fp->tellg())-offset) >= l_max)
           {
              if( !used )
@@ -998,12 +1048,6 @@ void Document::ParseSQ( SeqEntry *seqEntry,
        {
           dlm_mod = false;
        }
-      // FIXME, TODO
-      // when we're here, element fffe,e000 is already passed.
-      // it's lost for the SQItem we're going to process !!
-
-      //ParseDES(itemSQ, newDocEntry->GetOffset(), l, dlm_mod);
-      //delete newDocEntry; // FIXME well ... it's too late to use it !
  
        // Let's try :------------
        // remove fff0,e000, created out of the SQItem
@@ -1101,7 +1145,6 @@ void Document::LoadDocEntry(DocEntry *entry)
        return;
     }
  
-   /// \todo Any compacter code suggested (?)
     if ( IsDocEntryAnInteger(entry) )
     {   
        uint32_t NewInt;
@@ -1249,72 +1292,6 @@ void Document::FindDocEntryLength( DocEntry *entry )
  
        // Length is encoded on 2 bytes.
        length16 = ReadInt16();
-
-      // FIXME : This heuristic supposes that the first group following
-      //         group 0002 *has* and element 0000.
-      // BUT ... Element 0000 is optionnal :-(
-
-
-   // Fixed using : HandleOutOfGroup0002()
-   //              (first hereafter strategy ...)
-      
-      // We can tell the current file is encoded in big endian (like
-      // Data/US-RGB-8-epicard) when we find the "Transfer Syntax" tag
-      // and it's value is the one of the encoding of a big endian file.
-      // In order to deal with such big endian encoded files, we have
-      // (at least) two strategies:
-      // * when we load the "Transfer Syntax" tag with value of big endian
-      //   encoding, we raise the proper flags. Then we wait for the end
-      //   of the META group (0x0002) among which is "Transfer Syntax",
-      //   before switching the swap code to big endian. We have to postpone
-      //   the switching of the swap code since the META group is fully encoded
-      //   in little endian, and big endian coding only starts at the next
-      //   group. The corresponding code can be hard to analyse and adds
-      //   many additional unnecessary tests for regular tags.
-      // * the second strategy consists in waiting for trouble, that shall
-      //   appear when we find the first group with big endian encoding. This
-      //   is easy to detect since the length of a "Group Length" tag (the
-      //   ones with zero as element number) has to be of 4 (0x0004). When we
-      //   encounter 1024 (0x0400) chances are the encoding changed and we
-      //   found a group with big endian encoding.
-      //---> Unfortunately, element 0000 is optional.
-      //---> This will not work when missing!
-      // We shall use this second strategy. In order to make sure that we
-      // can interpret the presence of an apparently big endian encoded
-      // length of a "Group Length" without committing a big mistake, we
-      // add an additional check: we look in the already parsed elements
-      // for the presence of a "Transfer Syntax" whose value has to be "big
-      // endian encoding". When this is the case, chances are we have got our
-      // hands on a big endian encoded file: we switch the swap code to
-      // big endian and proceed...
-
-//      if ( element  == 0x0000 && length16 == 0x0400 ) 
-//      {
-//         std::string ts = GetTransferSyntax();
-//         if ( Global::GetTS()->GetSpecialTransferSyntax(ts) 
-//                != TS::ExplicitVRBigEndian ) 
-//         {
-//            throw FormatError( "Document::FindDocEntryLength()",
-//                               " not explicit VR." );
-//           return;
-//        }
-//        length16 = 4;
-//        SwitchByteSwapCode();
-//
-//         // Restore the unproperly loaded values i.e. the group, the element
-//         // and the dictionary entry depending on them.
-//         uint16_t correctGroup = SwapShort( entry->GetGroup() );
-//         uint16_t correctElem  = SwapShort( entry->GetElement() );
-//         DictEntry *newTag = GetDictEntry( correctGroup, correctElem );
-//         if ( !newTag )
-//         {
-//            // This correct tag is not in the dictionary. Create a new one.
-//            newTag = NewVirtualDictEntry(correctGroup, correctElem);
-//         }
-//         // FIXME this can create a memory leaks on the old entry that be
-//         // left unreferenced.
-//         entry->SetDictEntry( newTag );
-//      }
    
        // 0xffff means that we deal with 'No Length' Sequence 
        //        or 'No Length' SQItem
@@ -1374,12 +1351,14 @@ uint32_t Document::FindDocEntryLengthOBOrOW()
        if ( group != 0xfffe || ( ( elem != 0xe0dd ) && ( elem != 0xe000 ) ) )
        {
           long filePosition = Fp->tellg();
-         gdcmWarningMacro( "Neither an Item tag nor a Sequence delimiter tag on :" 
+         gdcmWarningMacro( 
+              "Neither an Item tag nor a Sequence delimiter tag on :" 
             << std::hex << group << " , " << elem 
             << ") -before- position x(" << filePosition << ")" );
    
           Fp->seekg(positionOnEntry, std::ios::beg);
-         throw FormatUnexpected( "Neither an Item tag nor a Sequence delimiter tag.");
+         throw FormatUnexpected( 
+               "Neither an Item tag nor a Sequence delimiter tag.");
        }
        if ( elem == 0xe0dd )
        {
@@ -1634,7 +1613,7 @@ void Document::FixDocEntryFoundLength(DocEntry *entry,
        if ( gr != 0x0008 || ( elem != 0x0070 && elem != 0x0080 ) )
        {
           foundLength = 10;
-         entry->SetReadLength(10); /// \todo a bug is to be fixed !?
+         entry->SetReadLength(10); // a bug is to be fixed !?
        }
     }
  
@@ -1645,7 +1624,7 @@ void Document::FixDocEntryFoundLength(DocEntry *entry,
     else if ( gr   == 0x0009 && ( elem == 0x1113 || elem == 0x1114 ) )
     {
        foundLength = 4;
-      entry->SetReadLength(4); /// \todo a bug is to be fixed !?
+      entry->SetReadLength(4); // a bug is to be fixed !?
     } 
   
     else if ( entry->GetVR() == "SQ" )
@@ -1677,10 +1656,10 @@ void Document::FixDocEntryFoundLength(DocEntry *entry,
   */
  bool Document::IsDocEntryAnInteger(DocEntry *entry)
  {
-   uint16_t elem          = entry->GetElement();
-   uint16_t group         = entry->GetGroup();
-   const std::string &vr  = entry->GetVR();
-   uint32_t length        = entry->GetLength();
+   uint16_t elem         = entry->GetElement();
+   uint16_t group        = entry->GetGroup();
+   const std::string &vr = entry->GetVR();
+   uint32_t length       = entry->GetLength();
  
     // When we have some semantics on the element we just read, and if we
     // a priori know we are dealing with an integer, then we shall be
@@ -1730,8 +1709,6 @@ bool Document::CheckSwap()
     // 0x00000004. Finding the swap code in then straigthforward. Trouble
     // occurs when we can't find such group...
     
-   uint32_t  x = 4;  // x : for ntohs
-   bool net2host; // true when HostByteOrder is the same as NetworkByteOrder
     uint32_t  s32;
     uint16_t  s16;
         
@@ -1739,14 +1716,7 @@ bool Document::CheckSwap()
      
     // First, compare HostByteOrder and NetworkByteOrder in order to
     // determine if we shall need to swap bytes (i.e. the Endian type).
-   if ( x == ntohs(x) )
-   {
-      net2host = true;
-   }
-   else
-   {
-      net2host = false;
-   }
+   bool net2host = Util::IsCurrentProcessorBigEndian();
           
     // The easiest case is the one of a 'true' DICOM header, we just have
     // to look for the string "DICM" inside the file preamble.
@@ -1925,7 +1895,7 @@ void Document::SwitchByteSwapCode()
  
  /**
   * \brief  during parsing, Header Elements too long are not loaded in memory 
- * @param newSize
+ * @param newSize new size
   */
  void Document::SetMaxSizeLoadEntry(long newSize) 
  {
@@ -1941,26 +1911,6 @@ void Document::SetMaxSizeLoadEntry(long newSize)
     MaxSizeLoadEntry = newSize;
  }
  
-/**
- * \brief Header Elements too long will not be printed
- * \todo  See comments of \ref Document::MAX_SIZE_PRINT_ELEMENT_VALUE 
- * @param newSize
- */
-void Document::SetMaxSizePrintEntry(long newSize) 
-{
-   if ( newSize < 0 )
-   {
-      return;
-   }
-   if ((uint32_t)newSize >= (uint32_t)0xffffffff )
-   {
-      MaxSizePrintEntry = 0xffffffff;
-      return;
-   }
-   MaxSizePrintEntry = newSize;
-}
-
-
  /**
   * \brief   Read the next tag but WITHOUT loading it's value
   *          (read the 'Group Number', the 'Element Number',
@@ -1997,9 +1947,14 @@ DocEntry *Document::ReadNextDocEntry()
  
     if( vr == GDCM_UNKNOWN)
     {
-      DictEntry *dictEntry = GetDictEntry(group,elem);
-      if( dictEntry )
-         realVR = dictEntry->GetVR();
+      if ( elem == 0x0000 ) // Group Length
+         realVR = "UL";     // must be UL
+      else
+      {
+         DictEntry *dictEntry = GetDictEntry(group,elem);
+         if( dictEntry )
+            realVR = dictEntry->GetVR();
+      }
     }
  
     DocEntry *newEntry;
@@ -2034,7 +1989,6 @@ DocEntry *Document::ReadNextDocEntry()
     catch ( FormatError e )
     {
        // Call it quits
-      //std::cout << e;
        delete newEntry;
        return 0;
     }