-
Notifications
You must be signed in to change notification settings - Fork 0
/
strsplit.m
110 lines (99 loc) · 3.01 KB
/
strsplit.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
function terms = strsplit(s, delimiter)
%STRSPLIT Splits a string into multiple terms
%
% terms = strsplit(s)
% splits the string s into multiple terms that are separated by
% white spaces (white spaces also include tab and newline).
%
% The extracted terms are returned in form of a cell array of
% strings.
%
% terms = strsplit(s, delimiter)
% splits the string s into multiple terms that are separated by
% the specified delimiter.
%
% Remarks
% -------
% - Note that the spaces surrounding the delimiter are considered
% part of the delimiter, and thus removed from the extracted
% terms.
%
% - If there are two consecutive non-whitespace delimiters, it is
% regarded that there is an empty-string term between them.
%
% Examples
% --------
% % extract the words delimited by white spaces
% ts = strsplit('I am using MATLAB');
% ts <- {'I', 'am', 'using', 'MATLAB'}
%
% % split operands delimited by '+'
% ts = strsplit('1+2+3+4', '+');
% ts <- {'1', '2', '3', '4'}
%
% % It still works if there are spaces surrounding the delimiter
% ts = strsplit('1 + 2 + 3 + 4', '+');
% ts <- {'1', '2', '3', '4'}
%
% % Consecutive delimiters results in empty terms
% ts = strsplit('C,Java, C++ ,, Python, MATLAB', ',');
% ts <- {'C', 'Java', 'C++', '', 'Python', 'MATLAB'}
%
% % When no delimiter is presented, the entire string is considered
% % as a single term
% ts = strsplit('YouAndMe');
% ts <- {'YouAndMe'}
%
% History
% -------
% - Created by Dahua Lin, on Oct 9, 2008
%
%% parse and verify input arguments
assert(ischar(s) && ndims(s) == 2 && size(s,1) <= 1, ...
'strsplit:invalidarg', ...
'The first input argument should be a char string.');
if nargin < 2
by_space = true;
else
d = delimiter;
assert(ischar(d) && ndims(d) == 2 && size(d,1) == 1 && ~isempty(d), ...
'strsplit:invalidarg', ...
'The delimiter should be a non-empty char string.');
d = strtrim(d);
by_space = isempty(d);
end
%% main
s = strtrim(s);
if by_space
w = isspace(s);
if any(w)
% decide the positions of terms
dw = diff(w);
sp = [1, find(dw == -1) + 1]; % start positions of terms
ep = [find(dw == 1), length(s)]; % end positions of terms
% extract the terms
nt = numel(sp);
terms = cell(1, nt);
for i = 1 : nt
terms{i} = s(sp(i):ep(i));
end
else
terms = {s};
end
else
p = strfind(s, d);
if ~isempty(p)
% extract the terms
nt = numel(p) + 1;
terms = cell(1, nt);
sp = 1;
dl = length(delimiter);
for i = 1 : nt-1
terms{i} = strtrim(s(sp:p(i)-1));
sp = p(i) + dl;
end
terms{nt} = strtrim(s(sp:end));
else
terms = {s};
end
end