Home > mrg > MRG_MIKE > mrg_ptxml_read.m

mrg_ptxml_read

PURPOSE ^

Reads a MIKE-formatted PT xml file

SYNOPSIS ^

function ptstruc = mrg_ptxml_read()

DESCRIPTION ^

 Reads a MIKE-formatted PT xml file

 OUTPUT
   ptstruc  A MATLAB strcture with items:

 AUTHORS
   Daniel Pritchard

 LICENCE
   Code distributed as part of the MRG toolbox from the Marine Research
   Group at Queens Univeristy Belfast (QUB) School of Planning
   Architecture and Civil Engineering (SPACE). Distributed under a
   creative commons CC BY-SA licence, retaining full copyright of the
   original authors.

   http://creativecommons.org/licenses/by-sa/3.0/
   http://www.qub.ac.uk/space/
   http://www.qub.ac.uk/research-centres/eerc/

 DEVELOPMENT
   v 1.0   2013-11-06
           First version. DP
   v 1.1   2013-11-26
           Multiple classes + Compressed data

 TODO
   Check the order of items in compressed particle clases

% Function Begin!

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function ptstruc = mrg_ptxml_read()
0002 % Reads a MIKE-formatted PT xml file
0003 %
0004 % OUTPUT
0005 %   ptstruc  A MATLAB strcture with items:
0006 %
0007 % AUTHORS
0008 %   Daniel Pritchard
0009 %
0010 % LICENCE
0011 %   Code distributed as part of the MRG toolbox from the Marine Research
0012 %   Group at Queens Univeristy Belfast (QUB) School of Planning
0013 %   Architecture and Civil Engineering (SPACE). Distributed under a
0014 %   creative commons CC BY-SA licence, retaining full copyright of the
0015 %   original authors.
0016 %
0017 %   http://creativecommons.org/licenses/by-sa/3.0/
0018 %   http://www.qub.ac.uk/space/
0019 %   http://www.qub.ac.uk/research-centres/eerc/
0020 %
0021 % DEVELOPMENT
0022 %   v 1.0   2013-11-06
0023 %           First version. DP
0024 %   v 1.1   2013-11-26
0025 %           Multiple classes + Compressed data
0026 %
0027 % TODO
0028 %   Check the order of items in compressed particle clases
0029 %
0030 %% Function Begin!
0031 if (nargin == 0)
0032     [filename, path] = uigetfile('.xml','Select an XML file to read');
0033     filename = [path, filename];
0034     cd(path);
0035 end
0036 
0037 fid = fopen(filename,'rt');
0038 if fid == -1
0039     error(id('mrg:FileNotFound'),['Could not find file: ' filename]);
0040 end
0041 
0042 %% Get Items
0043 % Get Items...
0044 disp('Looking for particle classes and item types...')
0045 line = 0;
0046 codeloc = 1;
0047 ptstruc = struct();
0048 while ~strcmp(line, '</DataAttributes>')
0049     line = fgetl(fid);
0050     % Check if it is a line (and not EOF)
0051     if ~ischar(line)
0052         break
0053     end
0054     % Match Class ID
0055     if strncmpi(line,'<ClassID>',9)
0056         classno = regexpi(line,'<ClassID>\s?(\d+){1}','tokens');
0057         classnonum = str2double(classno{:});
0058         line = fgetl(fid);
0059         % Match Class Name and propogate items...
0060         if strncmpi(line,'<Name>',6)
0061             classname = regexpi(line,'<Name>([\s\w\d]+){1}','tokens');
0062             classname = char(classname{:});
0063             classidname = [classname, '_ID',  num2str(classnonum)];
0064             ptstruc.(classidname).classID = classnonum;
0065             ptstruc.(classidname).classname = classname;
0066             ptstruc.(classidname).codes = cell(1,50); 
0067             % Assuming that there will be less than 50 types per particle class
0068         end
0069     end
0070     
0071     % Trim leading and trailing spaces
0072     line = strtrim(line);
0073     % Match 'code'
0074     if strncmpi(line,'<code>',6)
0075         code = regexpi(line,'<code>(\w+){1}</code>','tokens');
0076         code = char(code{:});
0077         ptstruc.(classidname).codes(codeloc) = {code};
0078         %codes(codeloc) = {code};
0079         codeloc = codeloc+1;
0080     end
0081 end
0082 
0083 itemnames = fieldnames(ptstruc);
0084 for a = 1:length(itemnames)
0085     cc = cellfun(@isempty,ptstruc.(itemnames{a}).codes);
0086     ptstruc.(itemnames{a}).codes = ptstruc.(itemnames{a}).codes(~cc);
0087     codetemp = ptstruc.(itemnames{a}).codes;
0088     disp(['Id: ', num2str(ptstruc.(itemnames{a}).classID), ...
0089         '; Name: ', ptstruc.(itemnames{a}).classname, ... 
0090         '; Items (', num2str(length(codetemp)),'): ', ...
0091         sprintf('%s', codetemp{1}), sprintf(', %s', codetemp{2:end})])
0092 end
0093 % and rewind...
0094 frewind(fid);
0095 
0096 %% Estimate partcles and TS's
0097 %numlines = str2num(perl('countlines.pl', filename) );
0098 disp('Estimating number of timesteps and particles')
0099 
0100 s = dir(filename);
0101 filebits = [0.01, 0.1, 0.2, 0.5, 1]; % Read the last 10%, 20%, 50% of the file
0102 bit = 1;
0103 while 1
0104     disp(['Trying last ', num2str(filebits(bit)*100), '% of file...'])
0105     fseek(fid,-floor(s.bytes*filebits(bit)),'eof'); % Seek to the last n percent of the file
0106     C = textscan(fid, '%s', 'Delimiter', '\n');
0107     C = C{1};
0108     lastpt = 'NOTFOUND';
0109     lastts = 'NOTFOUND';
0110     %lastpc = 'NOTFOUND'; % pc = 'particle class', not needed now
0111     iscompressed = 0;
0112     for a = 1:length(C)
0113         if strncmpi(C(a,:),'<Particle Nr',12)
0114             lastpt = C(a,:);
0115         end
0116         if strncmpi(C(a,:),'<TimeStep nr',12)
0117             lastts = C(a,:);
0118         end
0119         %if strncmpi(C(a,:),'<ParticleClass id',17)
0120         %    lastpc = C(a,:);
0121         %end
0122         if strncmpi(C(a,:),'<![CDATA[',9)
0123             iscompressed =  1;
0124         end
0125     end
0126     if(any([strcmp(lastpt,'NOTFOUND'), strcmp(lastts,'NOTFOUND')]))
0127         bit = bit+1;
0128     else
0129         break
0130     end
0131 end
0132 frewind(fid);
0133 
0134 pstr = regexpi(lastpt,'<Particle Nr="(\d+){1}">','tokens');
0135 pstr = char(pstr{1}{1});
0136 pnum = str2double(pstr);
0137 
0138 tsstr = regexpi(lastts,'<TimeStep nr="(\d+){1}">','tokens');
0139 tsstr = char(tsstr{1}{1});
0140 tsnum = str2double(tsstr);
0141 % TS are zero-indexed, correct that here
0142 tsnum = tsnum+1;
0143 tsstr = num2str(tsnum);
0144 
0145 % Not needed now, due to reading the header data (above)
0146 %pcstr = regexpi(lastpc,'<ParticleClass id="(\d+){1}">','tokens');
0147 %pcstr = char(pcstr{1}{1});
0148 %pcnum = str2double(pcstr);
0149 
0150 %if(pcnum > 1)
0151 %    error('mrg:NotImplemented', 'This function does not handle more than 1 particle class per file')
0152 %end
0153 
0154 %if(iscompressed)
0155 %    error('mrg:NotImplemented', 'This function does not handle compressed particle files (yet)')
0156 %end
0157 
0158 disp(['Est. no. timesteps: ', tsstr])
0159 disp(['Est. no. particles per class: ', pstr])
0160 
0161 %% Pre allocate space and setup
0162 ptitemnames = fieldnames(ptstruc);
0163 % Make a simple lookup table
0164 lookuptbl = cell(length(ptitemnames),3);
0165 % Make a list of all posisble items
0166 allitems = cell(1,1);
0167 % Allocate some space for DTs
0168 ptstruc.dtstr = repmat({''},1,tsnum);
0169 
0170 for a = 1:length(ptitemnames)
0171     codes = ptstruc.(ptitemnames{a}).codes;
0172     for b = 1:length(codes)
0173         ptstruc.(ptitemnames{a}).(codes{b}) = NaN(pnum,tsnum);
0174     end
0175     lookuptbl{a,1} = ptstruc.(ptitemnames{a}).classID;
0176     lookuptbl{a,2} = ptstruc.(ptitemnames{a}).classname;
0177     lookuptbl{a,3} = ptitemnames{a};
0178     allitems = [allitems, ptstruc.(ptitemnames{a}).codes];
0179 end
0180 allitems = unique(allitems(2:end)); % Needed later...
0181 
0182 currtsnum = 0;
0183 currptnum = 0;
0184 h = waitbar(0,'Please wait...');
0185 
0186 %% Read compressed data
0187 if iscompressed
0188     disp('Reading data (Compressed format)')
0189     % Loop through reding line by line (potentially slow, but hard to think how else to do this!)
0190     while 1
0191         line = fgetl(fid);
0192         % Check if it is a line (and not EOF)
0193         if ~ischar(line)
0194             break
0195         end
0196         % Trim leading and trailing spaces
0197         line = strtrim(line);
0198         % Match the timestep
0199         if strncmpi(line,'<TimeStep nr',12)
0200             currts = regexpi(line,'<TimeStep nr="(\d+){1}">','tokens');
0201             currtsnum = str2double(currts{:});
0202             currtsnum = currtsnum+1;
0203         end       
0204         % Match the DT
0205         if strncmpi(line,'<DateTime>',10)
0206             currdt = regexpi(line,'<DateTime>([\d-\s:]+)</DateTime>','tokens');
0207             currdtstr = char(currdt{:});
0208             ptstruc.dtstr(1,currtsnum) = {currdtstr};
0209             waitbar(currtsnum/tsnum, h, sprintf(['Timestep ', num2str(currtsnum),' of ', num2str(tsnum), '\n', currdtstr]))
0210         end
0211         % Match the class
0212         if strncmpi(line,'<ParticleClass id',17)
0213             currpcid = regexpi(line,'<ParticleClass id="(\d+){1}">','tokens');
0214             currpcidnum = str2double(currpcid{:});
0215             % Lookup name
0216             indx = cell2mat(lookuptbl(:,1))==currpcidnum;
0217             currfname = lookuptbl{indx,3};
0218         end
0219         % Match the particle number
0220         if strncmpi(line,'<Particle Nr',12)
0221             currpt = regexpi(line,'<Particle Nr="(\d+){1}">','tokens');
0222             currptnum = str2double(currpt{:});
0223         end
0224         % Match compressed data
0225         if strncmpi(line,'<![CDATA[',9)
0226             % <![CDATA[1020.000000,55.00000000,0.4968643188E-01,0.000000000,0.4029122053E-02,0.6749004142,0.000000000,0.000000000]]>
0227             pattern = ['<!\[CDATA\[(-?\d+\.*\d*[eE]?[\+\-]?\d*,?){',num2str(length(codes)),'}\]\]>'];
0228             dat = regexpi(line,pattern,'tokens');
0229             dat = regexpi(dat{1}{1},'(-?\d+\.*\d*[eE]?[\+\-]?\d*),?','tokens');           
0230             codetemp = ptstruc.(currfname).codes;
0231             for a = 1:length(codetemp)
0232                 ptstruc.(currfname).(codetemp{a})(currptnum,currtsnum) = str2double(dat{a}); 
0233                 % TODO: This assumes that they are written in the same order!
0234             end
0235         end
0236     end
0237 end
0238 %% Read uncompressed data
0239 if ~iscompressed
0240     disp('Reading data (Uncompressed format)')
0241     % Loop through reding line by line (potentially slow, but hard to think how else to do this!)
0242     while 1
0243         line = fgetl(fid);
0244         % Check if it is a line (and not EOF)
0245         if ~ischar(line)
0246             break
0247         end
0248         % Trim leading and trailing spaces
0249         line = strtrim(line);
0250         % Match the timestep
0251         if strncmpi(line,'<TimeStep nr',12)
0252             currts = regexpi(line,'<TimeStep nr="(\d+){1}">','tokens');
0253             currtsnum = str2double(currts{:});
0254             currtsnum = currtsnum+1;
0255         end
0256         % Match the DT
0257         if strncmpi(line,'<DateTime>',10)
0258             currdt = regexpi(line,'<DateTime>([\d-\s:]+)</DateTime>','tokens');
0259             currdtstr = char(currdt{:});
0260             ptstruc.dtstr(1,currtsnum) = {currdtstr};
0261             waitbar(currtsnum/tsnum, h, sprintf(['Timestep ', num2str(currtsnum),' of ', num2str(tsnum), '\n', currdtstr]))
0262         end
0263         % Match the class
0264         if strncmpi(line,'<ParticleClass id',17)
0265             currpcid = regexpi(line,'<ParticleClass id="(\d+){1}">','tokens');
0266             currpcidnum = str2double(currpcid{:});
0267             % Lookup name
0268             indx = cell2mat(lookuptbl(:,1))==currpcidnum;
0269             currfname = lookuptbl{indx,3};
0270         end
0271         % Match the particle number
0272         if strncmpi(line,'<Particle Nr',12)
0273             currpt = regexpi(line,'<Particle Nr="(\d+){1}">','tokens');
0274             currptnum = str2double(currpt{:});
0275         end
0276         % Match items
0277         matchers = strcat('<',allitems,'>');   
0278         matched =  regexpi(line, sprintf('(%s)?', matchers{:}),'tokens');
0279         if ~isempty(matched)
0280             matchedno =  find(~cellfun(@isempty,matched{:}));
0281             if length(matchedno) > 1
0282                 error('More than one match between item names and xml tags');
0283             end
0284             if length(matched) == 1
0285                 pattern = [matched{1}{matchedno}, '(-?\d+\.*\d*[eE]?[\+\-]?\d*)</'];       
0286                 dat = regexpi(line,pattern,'tokens');
0287                 ptstruc.(currfname).(allitems{matchedno})(currptnum,currtsnum) = str2double(dat{:});
0288             end
0289         end
0290     end
0291 end
0292 
0293 %% Finished!
0294 disp('Done!')
0295 close(h);
0296 fclose(fid);
0297 
0298 end

Generated on Thu 29-May-2014 21:29:53 by m2html © 2005