<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Jon Schutz Technical Notes and Recommendations &#187; Unicode</title>
	<atom:link href="http://notes.jschutz.net/topics/unicode/feed/" rel="self" type="application/rss+xml" />
	<link>http://notes.jschutz.net</link>
	<description>Useful snippets technical info and recommendations</description>
	<lastBuildDate>Thu, 24 Jun 2010 07:07:40 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.9.2</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>Unicode Character Classes</title>
		<link>http://notes.jschutz.net/2007/11/unicode-character-classes/</link>
		<comments>http://notes.jschutz.net/2007/11/unicode-character-classes/#comments</comments>
		<pubDate>Tue, 13 Nov 2007 23:30:43 +0000</pubDate>
		<dc:creator>jon</dc:creator>
				<category><![CDATA[Perl]]></category>
		<category><![CDATA[Unicode]]></category>
		<category><![CDATA[character classes]]></category>
		<category><![CDATA[regular expressions]]></category>

		<guid isPermaLink="false">http://notes.jschutz.net/8/unicode/unicode-character-classes</guid>
		<description><![CDATA[These are the Unicode &#8220;General Category&#8221; character class names used in regular expression matching, e.g. in Perl, \pP  or \p{Punctuation} to match all Unicode characters having the &#8220;punctuation&#8221; property.


Expression
Syntax
Long Name
Description


Letter
:L
Letter
Matches any letter, Ll &#124; Lm &#124; Lo &#124; Lt &#124; Lu


Uppercase letter
:Lu
Uppercase_Letter
Matches any one capital letter. For example, :Luhe matches &#8220;The&#8221; but not &#8220;the&#8221;.


Lowercase [...]]]></description>
			<content:encoded><![CDATA[<p>These are the Unicode &#8220;General Category&#8221; character class names used in regular expression matching, e.g. in Perl, \pP  or \p{Punctuation} to match all Unicode characters having the &#8220;punctuation&#8221; property.</p>
<table cellspacing="0">
<tr>
<th>Expression</th>
<th>Syntax</th>
<th>Long Name</th>
<th>Description</th>
</tr>
<tr>
<td>Letter</td>
<td>:L</td>
<td>Letter</td>
<td>Matches any letter, Ll | Lm | Lo | Lt | Lu</td>
</tr>
<tr>
<td>Uppercase letter</td>
<td>:Lu</td>
<td>Uppercase_Letter</td>
<td>Matches any one capital letter. For example, <code class="ce">:Luhe</code> matches &#8220;The&#8221; but not &#8220;the&#8221;.</td>
</tr>
<tr>
<td>Lowercase letter</td>
<td>:Ll</td>
<td>Lowercase_Letter</td>
<td>Matches any one lower case letter. For example, <code class="ce">:Llhe</code> matches &#8220;the&#8221; but not &#8220;The&#8221;.</td>
</tr>
<tr>
<td>Title case letter</td>
<td>:Lt</td>
<td>Titlecase_Letter</td>
<td>Matches characters that combine an uppercase letter with a lowercase letter, such as Nj and Dz.</td>
</tr>
<tr>
<td>Modifier letter</td>
<td>:Lm</td>
<td>Modifier_Letter</td>
<td>Matches letters or punctuation, such as commas, cross accents, and double prime, used to indicate modifications to the preceding letter.</td>
</tr>
<tr>
<td>Other letter</td>
<td>:Lo</td>
<td>Other_Letter</td>
<td>Matches other letters, such as gothic letter ahsa.</td>
</tr>
<tr>
<td>Cased letter</td>
<td>:LC</td>
<td>Cased_Letter</td>
<td>Matches any letter with case, Ll | Lt | Lu</td>
</tr>
<tr>
<td>Mark</td>
<td>:M</td>
<td>Mark</td>
<td>Matches any mark, Mc | Me | Mn</td>
</tr>
<tr>
<td>Non-spacing mark</td>
<td>:Mn</td>
<td>Nonspacing_Mark</td>
<td>Matches non-spacing marks.</td>
</tr>
<tr>
<td>Combining mark</td>
<td>:Mc</td>
<td>Spacing_Mark</td>
<td>Matches combining marks.</td>
</tr>
<tr>
<td>Enclosing mark</td>
<td>:Me</td>
<td>Enclosing_Mark</td>
<td>Matches enclosing marks.</td>
</tr>
<tr>
<td>Number</td>
<td>:N</td>
<td>Number</td>
<td>Matches any number, Nd | Nl | No</td>
</tr>
<tr>
<td>Decimal digit</td>
<td>:Nd</td>
<td>Decimal_Number</td>
<td>Matches decimal digits such as 0-9 and their full-width equivalents.</td>
</tr>
<tr>
<td>Letter digit</td>
<td>:Nl</td>
<td>Letter_Number</td>
<td>Matches letter digits such as roman numerals and ideographic number zero.</td>
</tr>
<tr>
<td>Other digit</td>
<td>:No</td>
<td>Other_Number</td>
<td>Matches other digits such as old italic number one.</td>
</tr>
<tr>
<td>Punctuation</td>
<td>:<span>P</span></td>
<td>Punctuation</td>
<td>Matches any puncutation, Pc | Pd | Pe | Pf | Pi | Po | Ps</td>
</tr>
<tr>
<td>Connector punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> c</td>
<td>Connector_Punctuation</td>
<td>Matches the underscore or underline mark.</td>
</tr>
<tr>
<td>Dash punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> d</td>
<td>Dash_Punctuation</td>
<td>Matches the dash mark.</td>
</tr>
<tr>
<td>Open punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> s</td>
<td>Open_Punctuation</td>
<td>Matches opening punctuation such as open brackets and braces.</td>
</tr>
<tr>
<td>Close punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> e</td>
<td>Close_Punctuation</td>
<td>Matches closing punctuation such as closing brackets and braces.</td>
</tr>
<tr>
<td>Initial quote punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> i</td>
<td>Initial_Punctuation</td>
<td>Matches initial double quotation marks.</td>
</tr>
<tr>
<td>Final quote punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> f</td>
<td>Final_Punctuation</td>
<td>Matches single quotation marks and ending double quotation marks.</td>
</tr>
<tr>
<td>Other punctuation</td>
<td> <img src='http://notes.jschutz.net/wp-includes/images/smilies/icon_razz.gif' alt=':P' class='wp-smiley' /> o</td>
<td>Other_Punctuation</td>
<td>Matches commas (,), ?, &#8220;, !, @, #, %, &amp;, *, \, colons (:), semi-colons (;), &#8216;, and /.</td>
</tr>
<tr>
<td>Symbol</td>
<td>:S</td>
<td>Symbol</td>
<td>Matches any symbol, Sc | Sk | Sm | So</td>
</tr>
<tr>
<td>Math symbol</td>
<td>:Sm</td>
<td>Math_Symbol</td>
<td>Matches +, =, ~, |, &lt;, and &gt;.</td>
</tr>
<tr>
<td>Currency symbol</td>
<td>:Sc</td>
<td>Currency_Symbol</td>
<td>Matches $ and other currency symbols.</td>
</tr>
<tr>
<td>Modifier symbol</td>
<td>:Sk</td>
<td>Modifier_Symbol</td>
<td>Matches modifier symbols such as circumflex accent, grave accent, and macron.</td>
</tr>
<tr>
<td>Other symbol</td>
<td>:So</td>
<td>Other_Symbol</td>
<td>Matches other symbols, such as the copyright sign, pilcrow sign, and the degree sign.</td>
</tr>
<tr>
<td>Separator</td>
<td>:Z</td>
<td>Separator</td>
<td>Matches any separator, Zl | Zp | Zs</td>
</tr>
<tr>
<td>Paragraph separator</td>
<td>:Zp</td>
<td>Paragraph_Separator</td>
<td>Matches the Unicode character U+2029.</td>
</tr>
<tr>
<td>Space separator</td>
<td>:Zs</td>
<td>Space_Separator</td>
<td>Matches blanks.</td>
</tr>
<tr>
<td>Line separator</td>
<td>:Zl</td>
<td>Line_Separator</td>
<td>Matches the Unicode character U+2028.</td>
</tr>
<tr>
<td>Other control</td>
<td>:Cc</td>
<td>Control</td>
<td>Matches end of line.</td>
</tr>
<tr>
<td>Other format</td>
<td>:Cf</td>
<td>Format</td>
<td>Formatting control character such as the bidirectional control characters.</td>
</tr>
<tr>
<td>Surrogate</td>
<td>:Cs</td>
<td>Surrogate</td>
<td>Matches one half of a surrogate pair.</td>
</tr>
<tr>
<td>Other private-use</td>
<td>:Co</td>
<td>Private_Use</td>
<td>Matches any character from the private-use area.</td>
</tr>
<tr>
<td>Other not assigned</td>
<td>:Cn</td>
<td>Unassigned</td>
<td>Matches characters that do not map to a Unicode character.</td>
</tr>
</table>
<p><strong>References:</strong>  <a href="http://www.unicode.org"></a></p>
<p><a href="http://www.unicode.org">unicode.org</a></p>
<p><a href="http://www.unicode.org/Public/UNIDATA/UCD.html#Properties">Unicode Character Properties</a></p>
<p><a href="http://www.unicode.org/reports/tr18/">Unicode Regular Expressions</a></p>
<p><a href="http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt" target="_blank">Unicode Property Aliases </a></p>
<p><a href="http://search.cpan.org/~nwclark/perl-5.8.8/pod/perlre.pod" target="_blank">Perl Regular Expressions</a></p>
<p><a href="http://www.pcre.org/" target="_blank">PCRE</a></p>
]]></content:encoded>
			<wfw:commentRss>http://notes.jschutz.net/2007/11/unicode-character-classes/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>
