source: icGREP/icgrep-devel/QA/greptest.xml @ 4402

Last change on this file since 4402 was 4402, checked in by cameron, 5 years ago

Parsing support for word boundary assertions \b, \B

File size: 13.8 KB
Line 
1
2<greptest>
3<datafile id="simple1">
4A few lines of input
5in this simple test file
6provide fodder for some simple
7regexp tests.
8</datafile>
9
10<datafile id="bounded_charclass">
11=a;
12=bb;
13=ccc;
14=dddd;
15=eeeee;
16=ffffff;
17=ggggggg;
18=hhhhhhhh;
19=iiiiiiiii;
20=jjjjjjjjjj;
21=kkkkkkkkkkk;
22=llllllllllll;
23=mmmmmmmmmmmmm;
24=nnnnnnnnnnnnnn;
25=ooooooooooooooo;
26=pppppppppppppppp;
27=qqqqqqqqqqqqqqqqq;
28=rrrrrrrrrrrrrrrrrr;
29=sssssssssssssssssss;
30=tttttttttttttttttttt;
31=uuuuuuuuuuuuuuuuuuuuu;
32=vvvvvvvvvvvvvvvvvvvvvv;
33=wwwwwwwwwwwwwwwwwwwwwww;
34=xxxxxxxxxxxxxxxxxxxxxxxx;
35=yyyyyyyyyyyyyyyyyyyyyyyyy;
36=zzzzzzzzzzzzzzzzzzzzzzzzzz;
37</datafile>
38
39<datafile id="RangeAltSeqMatchStarKplusWhileNotOptAny">
40Dogbe hat ,/R Cat dt bt bt bt bt bat MzzzzzzzzT MaT MT McT MdT MeT M0T M1T M2T M3T M4T
41Dogbe hit foffasm zza " Dog Cat 1, 4= Dog ['zxcvbnm,./R Dog MT
42Dogbe hot foffasm czzb " MazazazTDogogogogog Cat 1, 4= Dog [;'zxcvbnm,./R Dogtp
43Dogbe foffasm dooooc MazT" Dog Cat 1, 4= Dog [Sqwertyuiopasdfghjkl;'zxcvbnm,./R Dog Cat
44Dogbe foffasm ezzzzzzzzzzzzzzt "tp Dog Cat 12, ktp 4= Dog [jkl;'zxcvbnm,./R Dogtp
45Dogbe foffasm zze " Dog CatMjT , = Dog [;'zxcvbzzznm,./R Dog MazazT cat
46zzcztpDogbe fofasm zazazz4z Doggg Cat 6, azzzzz= Dog [;'zxcvbonm,.R Dog TUT Dog
47Natatatats Nats T M0T ed bazbzczdzt et
48Dfg dc fog Nt ezt
49MazazazazazazazT
50</datafile>
51
52
53<datafile id="StartEndAlt">
54The ever-growing social networks and social media provide invaluable
55sources of information for modeling the behavior of users. High-quality
56user models enable superior services and functions for end users. In this
57talk, I will present several examples of user modeling based on social
58networks and social media. I will first describe our research in modeling
59users' information preferences on Microblogs using a novel user message
60model. I will then discuss our work on extracting users' daily activities,
61such as dining and shopping, that inherently reflect their habits, intents and preferences.
62I explain our novel transfer learning solution via a collaborative boosting
63framework comprising a text-to-activity classifier for socially connected users.
64I will also describe our research on user modeling in multiple, overlapping
65social networks in a 'composite social network' setting. I will show the benefits of
66modeling the dynamics of composite networks, where the evolution processes
67of different networks are jointly considered. Finally, I will explain our
68research on finding social spammers in large social networks.
69</datafile>
70
71<datafile id="special_characters">
72The ] character may appear as the first character inside character class
73expressions such as []>)].
74In this case, the ] character does not terminate the character class, but
75stands for itself.
76Similarly, the - character may appear as the first or last character
77in a character class expression, such as [-] or []-].  Occurring as the
78first or last character in a class means that it is a member of the
79class, instead of being interpreted as a range metacharacter.
80For both ] and -, occurrence as the first character could mean after
81an opening [^ mark for negated character class.   That is [^]] is the
82class that matches everything but ], while [^-] is the class that matches
83anything but -.
84----------
85The above line does not match [^-].
86----------
87]]]]]]]]]]
88^^^^^^^^^^
89</datafile>
90
91<datafile id="ips"> 
92201.250.180.213
93236.4.20.176
94137.96.194.126
95245.16.96.112
96245.19.58.43
97131.176.131.248
98248.160.22.214
99156.179.88.103
100174.13.62.156
101256.122.123.5
10216.81.78.152
103177.17.24.167
10432.120.25.23
105138.82.66.15
1064.196.8.251
107101.30.211.3
108209.44.105.129
10956.166.31.72
110247.108.224.170
111124.248.83.156
112113.107.178.250
113189.243.10.192
114184.18.189.31
11548.145.33.2
116188.137.131.244
11749.161.61.42
11814.31.211.138
11924.39.39.136
120146.217.131.80
121205.141.18.135
122159.207.166.206
12396.211.62.20
12423.148.44.140
125109.159.129.161
126183.230.172.129
12748.178.63.192
128224.41.190.207
129144.114.56.31
130151.205.132.247
131161.194.12.184
13287.55.69.195
133214.198.102.143
134173.19.17.220
135197.80.158.167
136121.94.119.11
137208.174.42.104
138124.173.96.31
139112.107.215.199
140162.30.140.121
141227.241.9.145
1426.26.111.203
143106.14.115.226
144107.233.237.60
145153.24.163.23
146197.4.54.55
147111.14.253.18
14843.138.139.15
149125.148.160.131
150173.16.80.24
15130.194.250.136
152173.233.196.71
153</datafile>
154
155<datafile id="emails">
156danielsmithinvestment01@yahoo.com
157vivian.johnp24@gmail.com
158drjohnsonadamscompany@mail.com
159fb43@kurtz.onmicrosoft.com
160delphinehakizimana11@zipmail.com.br
161mrs.swp@outlook.com
162engr.saidsalem@workmail@co.za
163suleadams342003@gmail.com
164info.soopercredit@qq.com
165aliceisdale@yahoo.com
166elizabethjohnson134@hotmail.com
167anikaebertus@yahoo.se
168bayford_A@qq.com
169hijabfarid@hotmail.com
170zaringwarkipkalya@aol.fr
171monahmeddd2014@gmail.com
172hijab.farid@hotmail.cam
173dennis.melcher01@gmail.com
174publicitycbn@gmail.com
175michaelkruegerloancompany@gmail.com
176ben525387@gmail.com
177dgill_pwc@mynet.com
178dgill_pwc1@terra.com
179tuthpala12@gmail.com
180johanthony1956@e-mail.ua
181christopher.white01@live.co.uk
182anitaloanfirm@live.com
183aliadamssolicitors@gmail.com
184jonathanevans000@yahoo.com
185jwatson494@yahoo.com
186ec21buyer@gmail.com
187sussanbien2012@gmail.com
188info@pavochenkofinance.tk
189honbarrijzdende@gmail.com
190ernestebi699@e-mail.ua
191siwei4489@yahoo.com.hk
192peterkoffi.info@gmail.com
193zenithbankplc106@yahoo.com
194fidelitybankplc505@aim.com
195kymcrox03@gmail.com
196esqharsmith2015@gmail.com
197facebooklottdepartment936@gmail.com
198lt_industries@outlook.com
199cpfi.ltd@live.nope
200changying33@yahoo.com
201abdoul0000hamid@gmail.com
202foreign_exchange@live.co.uk
203hdcliveuk@live.com
204fatimahhassan1@fengv.com
205mikejosephloanfirm202@gmail.com
206skyebanktg@rediffmail.com
207mrsbellafirm001@gmail.com
208financtreasury.uk@email.com
209admin@senagua.gob.ec
210m2424m@live.com
211stevewilliam197@gmail.com
212mrmathew.martins@yahoo.com
213benjaminwilliam917@gmail.com
214abe.shelton1@lenta.ru
215owengah@live.com
216dlserv01@aol.com
217ee.apala@gmail.com
218bbcpaydpt@live.com
219undpfn20114@gmail.com
220janievitek@gmail.com
221creditservice@careceo.com
222cying011@yahoo.com
223christophe_gbeffa@hotmail.fr
224maracasinter@yahoo.com
225iquad94@yahoo.com
226emil.jacobs@mail.com
227emil.jacob@mail.ru
228mgremittance.info@yahoo.co.uk
229raymondmorgan02@hotmail.com
230mrs_sabahibrahim@ymail.com
231drthomascole7@gmail.com
232barrp.agbo@outlook.fr
233mrsmorganhenlenloanfirm@gmail.com
234barr.njdmdcggroup@yahoo.com
235hknbddhb@gmail.com
236michelfoucault@outlook.fr
237goldsupply@rediffmail.com
238dvdmumbai2000@gmail.com
239mikefinance02@gmail.com
240moonstoneking@gmail.com
241peterstone586@gmail.com
242denis_andre_phillipe@aol.com
243roberto.greco@aol.fr
244mark_grant112@hotmail.com
245nokiaxprizefoundationclaims@coolsite.net
246claims14_88@libero.it
247hon.leo.price@gmail.com
248info_unicef@consultant.com
249u_deliverycompany@yahoo.com
250eldhabiblamah152@gmail.com
251governorsanusi.lamido@yahoo.com.ph
252emyjean18@zipmail.com.br
253winningemail@luckymail.com
254barristervictor_odo@yahoo.com.ph
255nokia.global_promo@consultant.com
256headoffice_cv20448bd@libero.it
257ab.issah@yahoo.com
258ab_issah@yahoo.com.tw
259rifaatassad552@yahoo.com.hk
260barrsandilekhumalo@gmail.com
261gkiir@qq.nope
262ibrahimahmed3@aol.fr
263efccin@e-mail.ua
264dheerajrelan@gmail.com
265al-fardan@al-fardan-export.com
266mellissa000@hotmail.com
267verakones01@hotmail.com
268kivaloanfinance999@gmail.com
269atm.paydept00@outlook.com
270claudiokristiansen@yahoo.co.za
271info.kmf@gmx.com
272mambojames689@yahoo.co.uk
273a.salam2014bf@terra.com
274vanessappillip99@yahoo.com
275vanessaphillip@live.com
276alshat@emirates.net.ae
277</datafile>
278
279<datafile id="floats">
2809.7
28116.07
28227.675
28386.162
284189.36792
285859.073357
2861377.9901658
2871514.73870948
2882096.400730002
2892551.2050637982
2904615.26633110512
2918438.114838435104
29232036.61593959936
29336346.00047312989
294144826.22607192554
295+3.1eE5
296+4.992
297+2.425E+10
2989.5808eE10
2999.5808e10
300+0.416968e+0
301-0.3162108-0
302+0.03069882+0
303+0.132378721eE+-0
3040.43416726670
305+-0.43416726669e+0
306+-0.01976811464eE0
307-0.0197681146402e+-0
3080.02241943884633+0
309+-0.004803458640268eE-0
310+0.0008164744337844E+-0
3110.00266694045551024E+0
312+-0.0112132498185713980
3130.0003485919632198585e+-0
314-0.002599516682231249E+0
3150.02315181236174286E+0
316+0.0116575240311669+0
317+-0.06536499789006515eE+-0
318+20.914506804599366eE+-21
319+-20.062034167562416eE+20
32035.90964837611389E-1
321+-2.5508584172940916E-0
3220.6532888027107796eE0
323+0.02530509823216493E0
324-0.016818871414735502eE+-0
3250.01041535031385609E+0
326-0.017042043493346013eE0
327-0.015882934560610525eE0
328+-0.016271711916486607E+0
329-1.1521320712689072e-1
3300.5796638373356339+2
331-6.78321804536429e+-8
332+-18.6367662944200621
333+20.63224902663965eE21
334+-16.78193317331960417
33510.049610186973338-21
33664.51055985925869eE+-65
337+71.7394478831031eE+115
338+114.85412411903206eE-53
339+150.50431315365464e116
340-388.86846448777743eE+-334
341+-75.50343657758405E-76
342-75.50343657758405eE-151
343-216.9511816984773E176
344-175.798740561957eE-178
345+13.25998057047805113
346+3.745360060000819eE+27
347-27.329937066467846E23
34813.34390770072532E+35
349+34.68092648862783eE+-36
350+-35.6389454910375E-160
351+493.90278138088945eE+-1037
3521037.4462608675137+356
353-356.17279137431007E+983
354</datafile>
355
356<datafile id = "CRLF">line with CRLF &#13;&#10;two lines with LFCR &#10;&#13;final line
357</datafile>
358 <grepcase regexp="^$" datafile="CRLF" grepcount="1"/>
359 <grepcase regexp="^.*$" datafile="CRLF" grepcount="4"/>
360
361 <datafile id = "LU_test">
362The following line has LATIN CAPITAL LETTER G WITH MACRON in single quotes.
363'&#x1E20;'
364</datafile>
365
366<grepcase regexp="ab" datafile="StartEndAlt" grepcount="4"/>
367<grepcase regexp="a*b" datafile="StartEndAlt" grepcount="10"/>
368<grepcase regexp="ab*" datafile="StartEndAlt" grepcount="15"/>
369<grepcase regexp="^user|^I|our$" datafile="StartEndAlt" grepcount="5"/>
370
371<grepcase regexp="fe|si" datafile="simple1" grepcount="3"/>
372<grepcase regexp="in" datafile="simple1" grepcount="2"/>
373<grepcase regexp="[A-Z]" datafile="simple1" grepcount="1"/>
374<grepcase regexp="fodder|simple" datafile="simple1" grepcount="2"/>
375
376<grepcase regexp="[cde]{3}" datafile="bounded_charclass" grepcount="3"/>
377<grepcase regexp="[f-h]{5}" datafile="bounded_charclass" grepcount="3"/>
378<grepcase regexp="[a-z]{5}" datafile="bounded_charclass" grepcount="22"/>
379<grepcase regexp="[a-z]{5,15}" datafile="bounded_charclass" grepcount="22"/>
380<grepcase regexp="=[a-z]{7,}" datafile="bounded_charclass" grepcount="20"/>
381<grepcase regexp="=[a-z]{5,15};" datafile="bounded_charclass" grepcount="11"/>
382<grepcase regexp="(([wxy]{2}){3}){2}" datafile="bounded_charclass" grepcount="3"/>
383<grepcase regexp="(([wxy]{2}?){3}?){2}?" datafile="bounded_charclass" grepcount="3"/>
384<grepcase regexp="=([a-z][c-z])*;" datafile="bounded_charclass" grepcount="12"/>
385<grepcase regexp="[\u0061-\u007A]{6}" datafile="bounded_charclass" grepcount="21"/>
386<grepcase regexp="[\o{142}-d]{2}" datafile="bounded_charclass" grepcount="3"/>
387<grepcase regexp="[\x61-\U0000007A]{6}" datafile="bounded_charclass" grepcount="21"/>
388<grepcase regexp="(?i)[A-T]{6}" datafile="bounded_charclass" grepcount="15"/>
389<grepcase regexp="(?i)=S[A-T]S*;" datafile="bounded_charclass" grepcount="1"/>
390
391<grepcase regexp="^D[zabcdefoy]g" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="7"/>
392<grepcase regexp="do*c|ez*t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="4"/>
393<grepcase regexp="M(az)*T" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="6"/>         
394<grepcase regexp="ez+t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2" />
395<grepcase regexp="b([a-d]z)*t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
396<grepcase regexp="[^D]og" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
397<grepcase regexp="Na?t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
398<grepcase regexp="h.t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="3" />
399<grepcase regexp="do*?c|ez*?t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="4"/>
400<grepcase regexp="^.....\b" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="6"/>>
401
402<grepcase regexp="[]]" datafile="special_characters" grepcount="9"/>
403<grepcase regexp="[-]" datafile="special_characters" grepcount="8"/>
404<grepcase regexp="[]^-]" datafile="special_characters" grepcount="14"/>
405<grepcase regexp="[\-\]\^]" datafile="special_characters" grepcount="14"/>
406<grepcase regexp="[^]]" datafile="special_characters" grepcount="16"/>
407<grepcase regexp="[^-]" datafile="special_characters" grepcount="15"/>
408<grepcase regexp="[^^]" datafile="special_characters" grepcount="16"/>
409<grepcase regexp="[^]-]" datafile="special_characters" grepcount="14"/>
410<grepcase regexp="[.]" datafile="special_characters" grepcount="7"/>
411
412<grepcase regexp="^((([2][5][0-5]|([2][0-4]|[1][0-9]|[0-9])?[0-9])[.]){3})([2][5][0-5]|([2][0-4]|[1][0-9]|[0-9])?[0-9])$" datafile="ips" grepcount="60"/>
413<grepcase regexp="^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.([a-zA-Z]{2}|com|org|net|edu|gov|mil|biz|info|mobi|name|aero|asia|jobs|museum)$" datafile="emails" grepcount="116"/>
414<grepcase regexp="^[-+]?([1-9]0?)+\.?((0*[1-9])+|0)([eE][-+]?([0-9]+)+)?$" datafile="floats" grepcount="26"/>
415
416<!-- . should match a unique character, even if it is 3 bytes. -->
417<grepcase regexp="'.'" datafile="LU_test" grepcount="1"/>
418<grepcase regexp="'...'" datafile="LU_test" grepcount="0"/>
419<grepcase regexp="\u{1e20}" datafile="LU_test" grepcount="1"/>
420<grepcase regexp="\u1e20" datafile="LU_test" grepcount="1"/>
421<grepcase regexp="\U00001e20" datafile="LU_test" grepcount="1"/>
422<grepcase regexp="\o{17040}" datafile="LU_test" grepcount="1"/>
423<grepcase regexp="\u{1e21}" datafile="LU_test" grepcount="0"/>
424<grepcase regexp="\u1e21" datafile="LU_test" grepcount="0"/>
425<grepcase regexp="\U00001e21" datafile="LU_test" grepcount="0"/>
426<grepcase regexp="\o{17041}" datafile="LU_test" grepcount="0"/>
427<grepcase regexp="\p{Lu}" datafile="LU_test" grepcount="2"/>
428<grepcase regexp="'\p{Lu}'" datafile="LU_test" grepcount="1"/>
429<grepcase regexp="\p{Ll}" datafile="LU_test" grepcount="1"/>
430
431
432 <datafile id="codepoints">
433 A line with 0x89 &#x89;
434 A line with 0x1234 &#x1234;
435 A line with 0x1245 &#x1245;
436 äœ 
437 å¥œ
438 A plain line.
439</datafile>
440 <grepcase regexp="[\u{1234}-\u{1245}]" datafile="codepoints" grepcount="2"/>
441 <grepcase regexp="[\u{086}-\u{9A}]" datafile="codepoints" grepcount="1"/>
442 <grepcase regexp="[䜠奜]" datafile="codepoints" grepcount="2"/>
443 <grepcase regexp="\u{4F60}" datafile="codepoints" grepcount="1"/>
444</greptest>
Note: See TracBrowser for help on using the repository browser.