source: icGREP/icgrep-devel/QA/greptest.xml @ 4421

Last change on this file since 4421 was 4421, checked in by cameron, 5 years ago

Support for multiblock shift; wrapped_print_register

File size: 15.0 KB
Line 
1
2<greptest>
3<datafile id="simple1">
4A few lines of input
5in this simple test file
6provide fodder for some simple
7regexp tests.
8</datafile>
9
10<datafile id="bounded_charclass">
11=a;
12=bb;
13=ccc;
14=dddd;
15=eeeee;
16=ffffff;
17=ggggggg;
18=hhhhhhhh;
19=iiiiiiiii;
20=jjjjjjjjjj;
21=kkkkkkkkkkk;
22=llllllllllll;
23=mmmmmmmmmmmmm;
24=nnnnnnnnnnnnnn;
25=ooooooooooooooo;
26=pppppppppppppppp;
27=qqqqqqqqqqqqqqqqq;
28=rrrrrrrrrrrrrrrrrr;
29=sssssssssssssssssss;
30=tttttttttttttttttttt;
31=uuuuuuuuuuuuuuuuuuuuu;
32=vvvvvvvvvvvvvvvvvvvvvv;
33=wwwwwwwwwwwwwwwwwwwwwww;
34=xxxxxxxxxxxxxxxxxxxxxxxx;
35=yyyyyyyyyyyyyyyyyyyyyyyyy;
36=zzzzzzzzzzzzzzzzzzzzzzzzzz;
37=0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
38=01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
39=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
40=0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
41=01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
42=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
43=0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789;
44</datafile>
45
46<datafile id="RangeAltSeqMatchStarKplusWhileNotOptAny">
47Dogbe hat ,/R Cat dt bt bt bt bt bat MzzzzzzzzT MaT MT McT MdT MeT M0T M1T M2T M3T M4T
48Dogbe hit foffasm zza " Dog Cat 1, 4= Dog ['zxcvbnm,./R Dog MT
49Dogbe hot foffasm czzb " MazazazTDogogogogog Cat 1, 4= Dog [;'zxcvbnm,./R Dogtp
50Dogbe foffasm dooooc MazT" Dog Cat 1, 4= Dog [Sqwertyuiopasdfghjkl;'zxcvbnm,./R Dog Cat
51Dogbe foffasm ezzzzzzzzzzzzzzt "tp Dog Cat 12, ktp 4= Dog [jkl;'zxcvbnm,./R Dogtp
52Dogbe foffasm zze " Dog CatMjT , = Dog [;'zxcvbzzznm,./R Dog MazazT cat
53zzcztpDogbe fofasm zazazz4z Doggg Cat 6, azzzzz= Dog [;'zxcvbonm,.R Dog TUT Dog
54Natatatats Nats T M0T ed bazbzczdzt et
55Dfg dc fog Nt ezt
56MazazazazazazazT
57</datafile>
58
59
60<datafile id="StartEndAlt">
61The ever-growing social networks and social media provide invaluable
62sources of information for modeling the behavior of users. High-quality
63user models enable superior services and functions for end users. In this
64talk, I will present several examples of user modeling based on social
65networks and social media. I will first describe our research in modeling
66users' information preferences on Microblogs using a novel user message
67model. I will then discuss our work on extracting users' daily activities,
68such as dining and shopping, that inherently reflect their habits, intents and preferences.
69I explain our novel transfer learning solution via a collaborative boosting
70framework comprising a text-to-activity classifier for socially connected users.
71I will also describe our research on user modeling in multiple, overlapping
72social networks in a 'composite social network' setting. I will show the benefits of
73modeling the dynamics of composite networks, where the evolution processes
74of different networks are jointly considered. Finally, I will explain our
75research on finding social spammers in large social networks.
76</datafile>
77
78<datafile id="special_characters">
79The ] character may appear as the first character inside character class
80expressions such as []>)].
81In this case, the ] character does not terminate the character class, but
82stands for itself.
83Similarly, the - character may appear as the first or last character
84in a character class expression, such as [-] or []-].  Occurring as the
85first or last character in a class means that it is a member of the
86class, instead of being interpreted as a range metacharacter.
87For both ] and -, occurrence as the first character could mean after
88an opening [^ mark for negated character class.   That is [^]] is the
89class that matches everything but ], while [^-] is the class that matches
90anything but -.
91----------
92The above line does not match [^-].
93----------
94]]]]]]]]]]
95^^^^^^^^^^
96</datafile>
97
98<datafile id="ips"> 
99201.250.180.213
100236.4.20.176
101137.96.194.126
102245.16.96.112
103245.19.58.43
104131.176.131.248
105248.160.22.214
106156.179.88.103
107174.13.62.156
108256.122.123.5
10916.81.78.152
110177.17.24.167
11132.120.25.23
112138.82.66.15
1134.196.8.251
114101.30.211.3
115209.44.105.129
11656.166.31.72
117247.108.224.170
118124.248.83.156
119113.107.178.250
120189.243.10.192
121184.18.189.31
12248.145.33.2
123188.137.131.244
12449.161.61.42
12514.31.211.138
12624.39.39.136
127146.217.131.80
128205.141.18.135
129159.207.166.206
13096.211.62.20
13123.148.44.140
132109.159.129.161
133183.230.172.129
13448.178.63.192
135224.41.190.207
136144.114.56.31
137151.205.132.247
138161.194.12.184
13987.55.69.195
140214.198.102.143
141173.19.17.220
142197.80.158.167
143121.94.119.11
144208.174.42.104
145124.173.96.31
146112.107.215.199
147162.30.140.121
148227.241.9.145
1496.26.111.203
150106.14.115.226
151107.233.237.60
152153.24.163.23
153197.4.54.55
154111.14.253.18
15543.138.139.15
156125.148.160.131
157173.16.80.24
15830.194.250.136
159173.233.196.71
160</datafile>
161
162<datafile id="emails">
163danielsmithinvestment01@yahoo.com
164vivian.johnp24@gmail.com
165drjohnsonadamscompany@mail.com
166fb43@kurtz.onmicrosoft.com
167delphinehakizimana11@zipmail.com.br
168mrs.swp@outlook.com
169engr.saidsalem@workmail@co.za
170suleadams342003@gmail.com
171info.soopercredit@qq.com
172aliceisdale@yahoo.com
173elizabethjohnson134@hotmail.com
174anikaebertus@yahoo.se
175bayford_A@qq.com
176hijabfarid@hotmail.com
177zaringwarkipkalya@aol.fr
178monahmeddd2014@gmail.com
179hijab.farid@hotmail.cam
180dennis.melcher01@gmail.com
181publicitycbn@gmail.com
182michaelkruegerloancompany@gmail.com
183ben525387@gmail.com
184dgill_pwc@mynet.com
185dgill_pwc1@terra.com
186tuthpala12@gmail.com
187johanthony1956@e-mail.ua
188christopher.white01@live.co.uk
189anitaloanfirm@live.com
190aliadamssolicitors@gmail.com
191jonathanevans000@yahoo.com
192jwatson494@yahoo.com
193ec21buyer@gmail.com
194sussanbien2012@gmail.com
195info@pavochenkofinance.tk
196honbarrijzdende@gmail.com
197ernestebi699@e-mail.ua
198siwei4489@yahoo.com.hk
199peterkoffi.info@gmail.com
200zenithbankplc106@yahoo.com
201fidelitybankplc505@aim.com
202kymcrox03@gmail.com
203esqharsmith2015@gmail.com
204facebooklottdepartment936@gmail.com
205lt_industries@outlook.com
206cpfi.ltd@live.nope
207changying33@yahoo.com
208abdoul0000hamid@gmail.com
209foreign_exchange@live.co.uk
210hdcliveuk@live.com
211fatimahhassan1@fengv.com
212mikejosephloanfirm202@gmail.com
213skyebanktg@rediffmail.com
214mrsbellafirm001@gmail.com
215financtreasury.uk@email.com
216admin@senagua.gob.ec
217m2424m@live.com
218stevewilliam197@gmail.com
219mrmathew.martins@yahoo.com
220benjaminwilliam917@gmail.com
221abe.shelton1@lenta.ru
222owengah@live.com
223dlserv01@aol.com
224ee.apala@gmail.com
225bbcpaydpt@live.com
226undpfn20114@gmail.com
227janievitek@gmail.com
228creditservice@careceo.com
229cying011@yahoo.com
230christophe_gbeffa@hotmail.fr
231maracasinter@yahoo.com
232iquad94@yahoo.com
233emil.jacobs@mail.com
234emil.jacob@mail.ru
235mgremittance.info@yahoo.co.uk
236raymondmorgan02@hotmail.com
237mrs_sabahibrahim@ymail.com
238drthomascole7@gmail.com
239barrp.agbo@outlook.fr
240mrsmorganhenlenloanfirm@gmail.com
241barr.njdmdcggroup@yahoo.com
242hknbddhb@gmail.com
243michelfoucault@outlook.fr
244goldsupply@rediffmail.com
245dvdmumbai2000@gmail.com
246mikefinance02@gmail.com
247moonstoneking@gmail.com
248peterstone586@gmail.com
249denis_andre_phillipe@aol.com
250roberto.greco@aol.fr
251mark_grant112@hotmail.com
252nokiaxprizefoundationclaims@coolsite.net
253claims14_88@libero.it
254hon.leo.price@gmail.com
255info_unicef@consultant.com
256u_deliverycompany@yahoo.com
257eldhabiblamah152@gmail.com
258governorsanusi.lamido@yahoo.com.ph
259emyjean18@zipmail.com.br
260winningemail@luckymail.com
261barristervictor_odo@yahoo.com.ph
262nokia.global_promo@consultant.com
263headoffice_cv20448bd@libero.it
264ab.issah@yahoo.com
265ab_issah@yahoo.com.tw
266rifaatassad552@yahoo.com.hk
267barrsandilekhumalo@gmail.com
268gkiir@qq.nope
269ibrahimahmed3@aol.fr
270efccin@e-mail.ua
271dheerajrelan@gmail.com
272al-fardan@al-fardan-export.com
273mellissa000@hotmail.com
274verakones01@hotmail.com
275kivaloanfinance999@gmail.com
276atm.paydept00@outlook.com
277claudiokristiansen@yahoo.co.za
278info.kmf@gmx.com
279mambojames689@yahoo.co.uk
280a.salam2014bf@terra.com
281vanessappillip99@yahoo.com
282vanessaphillip@live.com
283alshat@emirates.net.ae
284</datafile>
285
286<datafile id="floats">
2879.7
28816.07
28927.675
29086.162
291189.36792
292859.073357
2931377.9901658
2941514.73870948
2952096.400730002
2962551.2050637982
2974615.26633110512
2988438.114838435104
29932036.61593959936
30036346.00047312989
301144826.22607192554
302+3.1eE5
303+4.992
304+2.425E+10
3059.5808eE10
3069.5808e10
307+0.416968e+0
308-0.3162108-0
309+0.03069882+0
310+0.132378721eE+-0
3110.43416726670
312+-0.43416726669e+0
313+-0.01976811464eE0
314-0.0197681146402e+-0
3150.02241943884633+0
316+-0.004803458640268eE-0
317+0.0008164744337844E+-0
3180.00266694045551024E+0
319+-0.0112132498185713980
3200.0003485919632198585e+-0
321-0.002599516682231249E+0
3220.02315181236174286E+0
323+0.0116575240311669+0
324+-0.06536499789006515eE+-0
325+20.914506804599366eE+-21
326+-20.062034167562416eE+20
32735.90964837611389E-1
328+-2.5508584172940916E-0
3290.6532888027107796eE0
330+0.02530509823216493E0
331-0.016818871414735502eE+-0
3320.01041535031385609E+0
333-0.017042043493346013eE0
334-0.015882934560610525eE0
335+-0.016271711916486607E+0
336-1.1521320712689072e-1
3370.5796638373356339+2
338-6.78321804536429e+-8
339+-18.6367662944200621
340+20.63224902663965eE21
341+-16.78193317331960417
34210.049610186973338-21
34364.51055985925869eE+-65
344+71.7394478831031eE+115
345+114.85412411903206eE-53
346+150.50431315365464e116
347-388.86846448777743eE+-334
348+-75.50343657758405E-76
349-75.50343657758405eE-151
350-216.9511816984773E176
351-175.798740561957eE-178
352+13.25998057047805113
353+3.745360060000819eE+27
354-27.329937066467846E23
35513.34390770072532E+35
356+34.68092648862783eE+-36
357+-35.6389454910375E-160
358+493.90278138088945eE+-1037
3591037.4462608675137+356
360-356.17279137431007E+983
361</datafile>
362
363<datafile id = "CRLF">line with CRLF &#13;&#10;two lines with LFCR &#10;&#13;final line
364</datafile>
365 <grepcase regexp="^$" datafile="CRLF" grepcount="1"/>
366 <grepcase regexp="^.*$" datafile="CRLF" grepcount="4"/>
367
368 <datafile id = "LU_test">
369The following line has LATIN CAPITAL LETTER G WITH MACRON in single quotes.
370'&#x1E20;'
371</datafile>
372
373<grepcase regexp="ab" datafile="StartEndAlt" grepcount="4"/>
374<grepcase regexp="a*b" datafile="StartEndAlt" grepcount="10"/>
375<grepcase regexp="ab*" datafile="StartEndAlt" grepcount="15"/>
376<grepcase regexp="^user|^I|our$" datafile="StartEndAlt" grepcount="5"/>
377
378<grepcase regexp="fe|si" datafile="simple1" grepcount="3"/>
379<grepcase regexp="in" datafile="simple1" grepcount="2"/>
380<grepcase regexp="[A-Z]" datafile="simple1" grepcount="1"/>
381<grepcase regexp="fodder|simple" datafile="simple1" grepcount="2"/>
382
383<grepcase regexp="[cde]{3}" datafile="bounded_charclass" grepcount="3"/>
384<grepcase regexp="[f-h]{5}" datafile="bounded_charclass" grepcount="3"/>
385<grepcase regexp="[a-z]{5}" datafile="bounded_charclass" grepcount="22"/>
386<grepcase regexp="[a-z]{5,15}" datafile="bounded_charclass" grepcount="22"/>
387<grepcase regexp="=[a-z]{7,}" datafile="bounded_charclass" grepcount="20"/>
388<grepcase regexp="=[a-z]{5,15};" datafile="bounded_charclass" grepcount="11"/>
389<grepcase regexp="(([wxy]{2}){3}){2}" datafile="bounded_charclass" grepcount="3"/>
390<grepcase regexp="(([wxy]{2}?){3}?){2}?" datafile="bounded_charclass" grepcount="3"/>
391<grepcase regexp="=([a-z][c-z])*;" datafile="bounded_charclass" grepcount="12"/>
392<grepcase regexp="[\u0061-\u007A]{6}" datafile="bounded_charclass" grepcount="21"/>
393<grepcase regexp="[\o{142}-d]{2}" datafile="bounded_charclass" grepcount="3"/>
394<grepcase regexp="[\x61-\U0000007A]{6}" datafile="bounded_charclass" grepcount="21"/>
395<grepcase regexp="(?i)[A-T]{6}" datafile="bounded_charclass" grepcount="15"/>
396<grepcase regexp="(?i)=S[A-T]S*;" datafile="bounded_charclass" grepcount="1"/>
397<grepcase regexp="=[0-9]{100};" datafile="bounded_charclass" grepcount="1"/>
398<grepcase regexp="=[0-9]{50,};" datafile="bounded_charclass" grepcount="7"/>
399<grepcase regexp="=[0-9]{140};" datafile="bounded_charclass" grepcount="1"/>
400<grepcase regexp="=[0-9a-z]{12,200};" datafile="bounded_charclass" grepcount="22"/>
401
402
403<grepcase regexp="^D[zabcdefoy]g" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="7"/>
404<grepcase regexp="do*c|ez*t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="4"/>
405<grepcase regexp="M(az)*T" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="6"/>         
406<grepcase regexp="ez+t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2" />
407<grepcase regexp="b([a-d]z)*t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
408<grepcase regexp="[^D]og" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
409<grepcase regexp="Na?t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="2"/>
410<grepcase regexp="h.t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="3" />
411<grepcase regexp="do*?c|ez*?t" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="4"/>
412<grepcase regexp="^.....\b" datafile="RangeAltSeqMatchStarKplusWhileNotOptAny" grepcount="6"/>>
413
414<grepcase regexp="[]]" datafile="special_characters" grepcount="9"/>
415<grepcase regexp="[-]" datafile="special_characters" grepcount="8"/>
416<grepcase regexp="[]^-]" datafile="special_characters" grepcount="14"/>
417<grepcase regexp="[\-\]\^]" datafile="special_characters" grepcount="14"/>
418<grepcase regexp="[^]]" datafile="special_characters" grepcount="16"/>
419<grepcase regexp="[^-]" datafile="special_characters" grepcount="15"/>
420<grepcase regexp="[^^]" datafile="special_characters" grepcount="16"/>
421<grepcase regexp="[^]-]" datafile="special_characters" grepcount="14"/>
422<grepcase regexp="[.]" datafile="special_characters" grepcount="7"/>
423
424<grepcase regexp="^((([2][5][0-5]|([2][0-4]|[1][0-9]|[0-9])?[0-9])[.]){3})([2][5][0-5]|([2][0-4]|[1][0-9]|[0-9])?[0-9])$" datafile="ips" grepcount="60"/>
425<grepcase regexp="^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.([a-zA-Z]{2}|com|org|net|edu|gov|mil|biz|info|mobi|name|aero|asia|jobs|museum)$" datafile="emails" grepcount="116"/>
426<grepcase regexp="^[-+]?([1-9]0?)+\.?((0*[1-9])+|0)([eE][-+]?([0-9]+)+)?$" datafile="floats" grepcount="26"/>
427
428<!-- . should match a unique character, even if it is 3 bytes. -->
429<grepcase regexp="'.'" datafile="LU_test" grepcount="1"/>
430<grepcase regexp="'...'" datafile="LU_test" grepcount="0"/>
431<grepcase regexp="\u{1e20}" datafile="LU_test" grepcount="1"/>
432<grepcase regexp="\u1e20" datafile="LU_test" grepcount="1"/>
433<grepcase regexp="\U00001e20" datafile="LU_test" grepcount="1"/>
434<grepcase regexp="\o{17040}" datafile="LU_test" grepcount="1"/>
435<grepcase regexp="\u{1e21}" datafile="LU_test" grepcount="0"/>
436<grepcase regexp="\u1e21" datafile="LU_test" grepcount="0"/>
437<grepcase regexp="\U00001e21" datafile="LU_test" grepcount="0"/>
438<grepcase regexp="\o{17041}" datafile="LU_test" grepcount="0"/>
439<grepcase regexp="\p{Lu}" datafile="LU_test" grepcount="2"/>
440<grepcase regexp="'\p{Lu}'" datafile="LU_test" grepcount="1"/>
441<grepcase regexp="\p{Ll}" datafile="LU_test" grepcount="1"/>
442
443
444 <datafile id="codepoints">
445 A line with 0x89 &#x89;
446 A line with 0x1234 &#x1234;
447 A line with 0x1245 &#x1245;
448 äœ 
449 å¥œ
450 A plain line.
451</datafile>
452 <grepcase regexp="[\u{1234}-\u{1245}]" datafile="codepoints" grepcount="2"/>
453 <grepcase regexp="[\u{086}-\u{9A}]" datafile="codepoints" grepcount="1"/>
454 <grepcase regexp="[䜠奜]" datafile="codepoints" grepcount="2"/>
455 <grepcase regexp="\u{4F60}" datafile="codepoints" grepcount="1"/>
456</greptest>
Note: See TracBrowser for help on using the repository browser.