Multext - Document MSG 1. MtSeg/tbl.punct.fr. Last modified




logo

MtSeg: tbl.punct.fr





# -----------------------------------------------------------
#   FORMAT  :
#
#  <symbolic name>TAB<regular expression>TAB<class name>
#
#  <keywords>TAB<regular expression>TAB<comment>
#     with mandatories keywords/directives:
#
#       MT_COMPOSED_PUNCT
#       MT_BREAKING_PUNCT
#       MT_INTERNAL_PUNCT
#       MT_NON_BREAKING_LEFT_PUNCT
#       MT_NON_BREAKING_RIGHT_PUNCT
#
# -----------------------------------------------------------


PERIOD        \.            TERM_PUNCT?   
SUSP          \.\.\.        TERM_PUNCT?   
I_SUSP        \.\.          TERM_PUNCT?   

OINF          <<+           OPEN_PUNCT    
CSUP          >>+           CLOSE_PUNCT   

OPAR          \(            OPEN_PUNCT    
CPAR          \)            CLOSE_PUNCT   

HYPHEN        -             PUNCTUATION   

OHYPHEN1      «             OPEN_PUNCT    
CHYPHEN1      »             CLOSE_PUNCT   

OCHYPHEN2     --+           PUNCTUATION   

OCQUOTE       "             OPENCLOSE_PUNCT

OCROCHE       \[            OPEN_PUNCT    
CCROCHE       \]            CLOSE_PUNCT   

SCOLON        ;             PUNCTUATION   
COLON         :             PUNCTUATION   
COMMA         ,             PUNCTUATION   

EXCLAM        !             TERM_PUNCT?   
IEXCLAM       ¡             OPEN_PUNCT    

QUEST         \?            TERM_PUNCT    
IQUEST        ¿             PUNCTUATION   

RSQUOTE_L     `             OPEN_PUNCT    
RSQUOTE_R     '             CLOSE_PUNCT   

SLASH         \/            PUNCTUATION   
SGML_DASH     (\&mdash\;)   PUNCTUATION   

S_EXCLAM      \!\.\.\.      TERM_PUNCT    
Q_EXCLAM      \?\.\.\.      TERM_PUNCT    

# -----------------------------------------------------------
# WARNING: after this line, keywords must not be changed
#          modify only the regular expression associated with
# -----------------------------------------------------------

# Punctuation composed by more than one letter
# --------------------------------------------

MT_COMPOSED_PUNCT           {I_SUSP}|{SUSP}|{OINF}|{CSUP}|{OCHYPHEN2}|{SGML_DASH}|{S_EXCLAM}|{Q_EXCLAM}

# Punctuation always used as separator, 
# never as part of clitics or abbreviation ...
# ---------------------------------------------

MT_BREAKING_PUNCT           [{SCOLON}{COLON}{EXCLAM}{IEXCLAM}{QUEST}{IQUEST}{RSQUOTE_L}{OCQUOTE}{OHYPHEN1}{CHYPHEN1}]|{OPAR}|{OCROCHE}|{CCROCHE}|{CPAR}


# Punctuation which can be found in compound or clitic 
# and so ...
# they will not be cut
# -----------------------------------------------------------

MT_INTERNAL_PUNCT           [{COMMA}{RSQUOTE_R}{HYPHEN}]|{PERIOD}|{SUSP}|{I_SUSP}|{SGML_DASH}

MT_NON_BREAKING_LEFT_PUNCT  <NONE>

MT_NON_BREAKING_RIGHT_PUNCT {PERIOD}|{SUSP}|{I_SUSP}


HTML 3.2 Checked! | Top | Back | MtSeg home page | LPL/CNRS | MULTEXT |

Copyright © Centre National de la Recherche Scientifique, 1996.