0001 function ptstruc = mrg_ptxml_read()
0002 % Reads a MIKE-formatted PT xml file
0003 %
0004 % OUTPUT
0005 %   ptstruc  A MATLAB strcture with items:
0006 %
0007 % AUTHORS
0008 %   Daniel Pritchard
0009 %
0010 % LICENCE
0011 %   Code distributed as part of the MRG toolbox from the Marine Research
0012 %   Group at Queens Univeristy Belfast (QUB) School of Planning
0013 %   Architecture and Civil Engineering (SPACE). Distributed under a
0014 %   creative commons CC BY-SA licence, retaining full copyright of the
0015 %   original authors.
0016 %
0017 %   http://creativecommons.org/licenses/by-sa/3.0/
0018 %   http://www.qub.ac.uk/space/
0019 %   http://www.qub.ac.uk/research-centres/eerc/
0020 %
0022 %   v 1.0   2013-11-06
0023 %           First version. DP
0024 %   v 1.1   2013-11-26
0025 %           Multiple classes + Compressed data
0026 %
0027 % TODO
0028 %   Check the order of items in compressed particle clases
0029 %
0030 %% Function Begin!
0031 if (nargin == 0)
0032     [filename, path] = uigetfile('.xml','Select an XML file to read');
0033     filename = [path, filename];
0034     cd(path);
0035 end
0037 fid = fopen(filename,'rt');
0038 if fid == -1
0039     error(id('mrg:FileNotFound'),['Could not find file: ' filename]);
0040 end
0042 %% Get Items
0043 % Get Items...
0044 disp('Looking for particle classes and item types...')
0045 line = 0;
0046 codeloc = 1;
0047 ptstruc = struct();
0048 while ~strcmp(line, '</DataAttributes>')
0049     line = fgetl(fid);
0050     % Check if it is a line (and not EOF)
0051     if ~ischar(line)
0052         break
0053     end
0054     % Match Class ID
0055     if strncmpi(line,'<ClassID>',9)
0056         classno = regexpi(line,'<ClassID>\s?(\d+){1}','tokens');
0057         classnonum = str2double(classno{:});
0058         line = fgetl(fid);
0059         % Match Class Name and propogate items...
0060         if strncmpi(line,'<Name>',6)
0061             classname = regexpi(line,'<Name>([\s\w\d]+){1}','tokens');
0062             classname = char(classname{:});
0063             classidname = [classname, '_ID',  num2str(classnonum)];
0064             ptstruc.(classidname).classID = classnonum;
0065             ptstruc.(classidname).classname = classname;
0066             ptstruc.(classidname).codes = cell(1,50); 
0067             % Assuming that there will be less than 50 types per particle class
0068         end
0069     end
0071     % Trim leading and trailing spaces
0072     line = strtrim(line);
0073     % Match 'code'
0074     if strncmpi(line,'<code>',6)
0075         code = regexpi(line,'<code>(\w+){1}</code>','tokens');
0076         code = char(code{:});
0077         ptstruc.(classidname).codes(codeloc) = {code};
0078         %codes(codeloc) = {code};
0079         codeloc = codeloc+1;
0080     end
0081 end
0083 itemnames = fieldnames(ptstruc);
0084 for a = 1:length(itemnames)
0085     cc = cellfun(@isempty,ptstruc.(itemnames{a}).codes);
0086     ptstruc.(itemnames{a}).codes = ptstruc.(itemnames{a}).codes(~cc);
0087     codetemp = ptstruc.(itemnames{a}).codes;
0088     disp(['Id: ', num2str(ptstruc.(itemnames{a}).classID), ...
0089         '; Name: ', ptstruc.(itemnames{a}).classname, ... 
0090         '; Items (', num2str(length(codetemp)),'): ', ...
0091         sprintf('%s', codetemp{1}), sprintf(', %s', codetemp{2:end})])
0092 end
0093 % and rewind...
0094 frewind(fid);
0096 %% Estimate partcles and TS's
0097 %numlines = str2num(perl('countlines.pl', filename) );
0098 disp('Estimating number of timesteps and particles')
0100 s = dir(filename);
0101 filebits = [0.01, 0.1, 0.2, 0.5, 1]; % Read the last 10%, 20%, 50% of the file
0102 bit = 1;
0103 while 1
0104     disp(['Trying last ', num2str(filebits(bit)*100), '% of file...'])
0105     fseek(fid,-floor(s.bytes*filebits(bit)),'eof'); % Seek to the last n percent of the file
0106     C = textscan(fid, '%s', 'Delimiter', '\n');
0107     C = C{1};
0108     lastpt = 'NOTFOUND';
0109     lastts = 'NOTFOUND';
0110     %lastpc = 'NOTFOUND'; % pc = 'particle class', not needed now
0111     iscompressed = 0;
0112     for a = 1:length(C)
0113         if strncmpi(C(a,:),'<Particle Nr',12)
0114             lastpt = C(a,:);
0115         end
0116         if strncmpi(C(a,:),'<TimeStep nr',12)
0117             lastts = C(a,:);
0118         end
0119         %if strncmpi(C(a,:),'<ParticleClass id',17)
0120         %    lastpc = C(a,:);
0121         %end
0122         if strncmpi(C(a,:),'<![CDATA[',9)
0123             iscompressed =  1;
0124         end
0125     end
0126     if(any([strcmp(lastpt,'NOTFOUND'), strcmp(lastts,'NOTFOUND')]))
0127         bit = bit+1;
0128     else
0129         break
0130     end
0131 end
0132 frewind(fid);
0134 pstr = regexpi(lastpt,'<Particle Nr="(\d+){1}">','tokens');
0135 pstr = char(pstr{1}{1});
0136 pnum = str2double(pstr);
0138 tsstr = regexpi(lastts,'<TimeStep nr="(\d+){1}">','tokens');
0139 tsstr = char(tsstr{1}{1});
0140 tsnum = str2double(tsstr);
0141 % TS are zero-indexed, correct that here
0142 tsnum = tsnum+1;
0143 tsstr = num2str(tsnum);
0145 % Not needed now, due to reading the header data (above)
0146 %pcstr = regexpi(lastpc,'<ParticleClass id="(\d+){1}">','tokens');
0147 %pcstr = char(pcstr{1}{1});
0148 %pcnum = str2double(pcstr);
0150 %if(pcnum > 1)
0151 %    error('mrg:NotImplemented', 'This function does not handle more than 1 particle class per file')
0152 %end
0154 %if(iscompressed)
0155 %    error('mrg:NotImplemented', 'This function does not handle compressed particle files (yet)')
0156 %end
0158 disp(['Est. no. timesteps: ', tsstr])
0159 disp(['Est. no. particles per class: ', pstr])
0161 %% Pre allocate space and setup
0162 ptitemnames = fieldnames(ptstruc);
0163 % Make a simple lookup table
0164 lookuptbl = cell(length(ptitemnames),3);
0165 % Make a list of all posisble items
0166 allitems = cell(1,1);
0167 % Allocate some space for DTs
0168 ptstruc.dtstr = repmat({''},1,tsnum);
0170 for a = 1:length(ptitemnames)
0171     codes = ptstruc.(ptitemnames{a}).codes;
0172     for b = 1:length(codes)
0173         ptstruc.(ptitemnames{a}).(codes{b}) = NaN(pnum,tsnum);
0174     end
0175     lookuptbl{a,1} = ptstruc.(ptitemnames{a}).classID;
0176     lookuptbl{a,2} = ptstruc.(ptitemnames{a}).classname;
0177     lookuptbl{a,3} = ptitemnames{a};
0178     allitems = [allitems, ptstruc.(ptitemnames{a}).codes];
0179 end
0180 allitems = unique(allitems(2:end)); % Needed later...
0182 currtsnum = 0;
0183 currptnum = 0;
0184 h = waitbar(0,'Please wait...');
0186 %% Read compressed data
0187 if iscompressed
0188     disp('Reading data (Compressed format)')
0189     % Loop through reding line by line (potentially slow, but hard to think how else to do this!)
0190     while 1
0191         line = fgetl(fid);
0192         % Check if it is a line (and not EOF)
0193         if ~ischar(line)
0194             break
0195         end
0196         % Trim leading and trailing spaces
0197         line = strtrim(line);
0198         % Match the timestep
0199         if strncmpi(line,'<TimeStep nr',12)
0200             currts = regexpi(line,'<TimeStep nr="(\d+){1}">','tokens');
0201             currtsnum = str2double(currts{:});
0202             currtsnum = currtsnum+1;
0203         end       
0204         % Match the DT
0205         if strncmpi(line,'<DateTime>',10)
0206             currdt = regexpi(line,'<DateTime>([\d-\s:]+)</DateTime>','tokens');
0207             currdtstr = char(currdt{:});
0208             ptstruc.dtstr(1,currtsnum) = {currdtstr};
0209             waitbar(currtsnum/tsnum, h, sprintf(['Timestep ', num2str(currtsnum),' of ', num2str(tsnum), '\n', currdtstr]))
0210         end
0211         % Match the class
0212         if strncmpi(line,'<ParticleClass id',17)
0213             currpcid = regexpi(line,'<ParticleClass id="(\d+){1}">','tokens');
0214             currpcidnum = str2double(currpcid{:});
0215             % Lookup name
0216             indx = cell2mat(lookuptbl(:,1))==currpcidnum;
0217             currfname = lookuptbl{indx,3};
0218         end
0219         % Match the particle number
0220         if strncmpi(line,'<Particle Nr',12)
0221             currpt = regexpi(line,'<Particle Nr="(\d+){1}">','tokens');
0222             currptnum = str2double(currpt{:});
0223         end
0224         % Match compressed data
0225         if strncmpi(line,'<![CDATA[',9)
0226             % <![CDATA[1020.000000,55.00000000,0.4968643188E-01,0.000000000,0.4029122053E-02,0.6749004142,0.000000000,0.000000000]]>
0227             pattern = ['<!\[CDATA\[(-?\d+\.*\d*[eE]?[\+\-]?\d*,?){',num2str(length(codes)),'}\]\]>'];
0228             dat = regexpi(line,pattern,'tokens');
0229             dat = regexpi(dat{1}{1},'(-?\d+\.*\d*[eE]?[\+\-]?\d*),?','tokens');           
0230             codetemp = ptstruc.(currfname).codes;
0231             for a = 1:length(codetemp)
0232                 ptstruc.(currfname).(codetemp{a})(currptnum,currtsnum) = str2double(dat{a}); 
0233                 % TODO: This assumes that they are written in the same order!
0234             end
0235         end
0236     end
0237 end
0238 %% Read uncompressed data
0239 if ~iscompressed
0240     disp('Reading data (Uncompressed format)')
0241     % Loop through reding line by line (potentially slow, but hard to think how else to do this!)
0242     while 1
0243         line = fgetl(fid);
0244         % Check if it is a line (and not EOF)
0245         if ~ischar(line)
0246             break
0247         end
0248         % Trim leading and trailing spaces
0249         line = strtrim(line);
0250         % Match the timestep
0251         if strncmpi(line,'<TimeStep nr',12)
0252             currts = regexpi(line,'<TimeStep nr="(\d+){1}">','tokens');
0253             currtsnum = str2double(currts{:});
0254             currtsnum = currtsnum+1;
0255         end
0256         % Match the DT
0257         if strncmpi(line,'<DateTime>',10)
0258             currdt = regexpi(line,'<DateTime>([\d-\s:]+)</DateTime>','tokens');
0259             currdtstr = char(currdt{:});
0260             ptstruc.dtstr(1,currtsnum) = {currdtstr};
0261             waitbar(currtsnum/tsnum, h, sprintf(['Timestep ', num2str(currtsnum),' of ', num2str(tsnum), '\n', currdtstr]))
0262         end
0263         % Match the class
0264         if strncmpi(line,'<ParticleClass id',17)
0265             currpcid = regexpi(line,'<ParticleClass id="(\d+){1}">','tokens');
0266             currpcidnum = str2double(currpcid{:});
0267             % Lookup name
0268             indx = cell2mat(lookuptbl(:,1))==currpcidnum;
0269             currfname = lookuptbl{indx,3};
0270         end
0271         % Match the particle number
0272         if strncmpi(line,'<Particle Nr',12)
0273             currpt = regexpi(line,'<Particle Nr="(\d+){1}">','tokens');
0274             currptnum = str2double(currpt{:});
0275         end
0276         % Match items
0277         matchers = strcat('<',allitems,'>');   
0278         matched =  regexpi(line, sprintf('(%s)?', matchers{:}),'tokens');
0279         if ~isempty(matched)
0280             matchedno =  find(~cellfun(@isempty,matched{:}));
0281             if length(matchedno) > 1
0282                 error('More than one match between item names and xml tags');
0283             end
0284             if length(matched) == 1
0285                 pattern = [matched{1}{matchedno}, '(-?\d+\.*\d*[eE]?[\+\-]?\d*)</'];       
0286                 dat = regexpi(line,pattern,'tokens');
0287                 ptstruc.(currfname).(allitems{matchedno})(currptnum,currtsnum) = str2double(dat{:});
0288             end
0289         end
0290     end
0291 end
0293 %% Finished!
0294 disp('Done!')
0295 close(h);
0296 fclose(fid);
0298 end

