summaryrefslogtreecommitdiff
path: root/Functions/Calendar/calendar_scandate
diff options
context:
space:
mode:
Diffstat (limited to 'Functions/Calendar/calendar_scandate')
-rw-r--r--Functions/Calendar/calendar_scandate124
1 files changed, 86 insertions, 38 deletions
diff --git a/Functions/Calendar/calendar_scandate b/Functions/Calendar/calendar_scandate
index 4ae2ae606..b3a583705 100644
--- a/Functions/Calendar/calendar_scandate
+++ b/Functions/Calendar/calendar_scandate
@@ -23,6 +23,19 @@
# from 1900 to 2099 inclusive are matched.
# - Although timezones are parsed (complicated formats may not be recognized),
# they are then ignored; no time adjustment is made.
+# - Embedding of times within dates (e.g. "Wed Jun 16 09:30:00 BST 2010")
+# causes horrific problems because of the combination of the many
+# possible date and time formats to match. The approach taken
+# here is to match the time, remove it, and see if the nearby text
+# looks like a date. The problem is that the time matched may not
+# be that associated with the date, in which case the time will be
+# ignored. To minimise this, when the argument "-a" is given to
+# anchor the date/time to the start of the line, we never look
+# beyond a newline. So if any date/time strings in the text
+# are on separate lines the problem is avoided.
+# - If you feel sophisticated enough and wish to avoid any ambiguity,
+# you can use RFC 2445 date/time strings, for example 20100601T170000.
+# These are parsed in one go.
#
# The following give some obvious examples; users finding here
# a format they like and not subject to vagaries of style may skip
@@ -136,7 +149,7 @@
# In this case absolute dates are ignored.
emulate -L zsh
-setopt extendedglob
+setopt extendedglob # xtrace
zmodload -i zsh/datetime || return 1
@@ -145,7 +158,7 @@ zmodload -i zsh/datetime || return 1
# relatively logical dates like 2006/09/19:14:27
# don't allow / before time ! the above
# is not 19 hours 14 mins and 27 seconds after anything.
-local tschars="[-,:[:space:]]"
+local tschars="[-,:[:blank:]]"
# start pattern for time when anchored
local tspat_anchor="(${tschars}#)"
# ... when not anchored
@@ -175,9 +188,10 @@ local repat="(|s)(|${schars}*)"
# We may need some completely different heuristic.
local monthpat="(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]#"
integer daysecs=$(( 24 * 60 * 60 ))
+local d="[[:digit:]]"
integer year year2 month month2 day day2 hour minute second then nth wday wday2
-local opt line orig_line mname MATCH MBEGIN MEND tz test
+local opt line orig_line mname MATCH MBEGIN MEND tz test rest_line
local -a match mbegin mend
# Flags that we found a date or a time (maybe a relative time)
integer date_found time_found
@@ -237,7 +251,7 @@ while getopts "aAdmrR:st" opt; do
done
shift $(( OPTIND - 1 ))
-line=$1 orig_line=$1
+line=$1
local dspat dspat_noday tspat
if (( anchor )); then
@@ -250,11 +264,20 @@ if (( anchor )); then
# We'll test later if the time is associated with the date.
tspat=$tspat_noanchor
fi
+ # We can save a huge amount of grief (I've discovered) if when
+ # we're anchored to the start we ignore anything after a newline.
+ # However, don't do this if we're anchored to the end. The
+ # match should fail if there are extra lines in that case.
+ if [[ anchor_end -eq 0 && $line = (#b)([^$'\n']##)($'\n'*) ]]; then
+ line=$match[1]
+ rest_line=$match[2]
+ fi
else
dspat=$dspat_noanchor
dspat_noday=$dspat_noanchor
tspat=$tspat_noanchor
fi
+orig_line=$line
# Look for a time separately; we need colons for this.
# We want to look for the first time to ensure it's associated
@@ -268,6 +291,7 @@ fi
# To use a case statement we'd need to be able to request non-greedy
# matching for a pattern.
local rest
+# HH:MM:SECONDS am/pm with optional decimal seconds
rest=${line#(#ibm)${~tspat}(<0-12>):(<0-59>)[.:]((<0-59>)(.<->|))[[:space:]]#([ap])(|.)[[:space:]]#m(.|[[:space:]]|(#e))}
if [[ $rest != $line ]]; then
hour=$match[2]
@@ -275,7 +299,8 @@ if [[ $rest != $line ]]; then
second=$match[5]
[[ $match[7] = (#i)p ]] && (( hour <= 12 )) && (( hour += 12 ))
time_found=1
-else
+fi
+if (( time_found == 0 )); then
# no seconds, am/pm
rest=${line#(#ibm)${~tspat}(<0-12>):(<0-59>)[[:space:]]#([ap])(|.)[[:space:]]#m(.|[[:space:]]|(#e))}
if [[ $rest != $line ]]; then
@@ -283,37 +308,60 @@ else
minute=$match[3]
[[ $match[4] = (#i)p ]] && (( hour <= 12 )) && (( hour += 12 ))
time_found=1
- else
- # no colon, even, but a.m./p.m. indicator
- rest=${line#(#ibm)${~tspat}(<0-12>)[[:space:]]#([ap])(|.)[[:space:]]#m(.|[[:space:]]|(#e))}
- if [[ $rest != $line ]]; then
- hour=$match[2]
- minute=0
- [[ $match[3] = (#i)p ]] && (( hour <= 12 )) && (( hour += 12 ))
- time_found=1
- else
- # 24 hour clock, with seconds
- rest=${line#(#ibm)${~tspat}(<0-24>):(<0-59>)[.:]((<0-59>)(.<->|))(.|[[:space:]]|(#e))}
- if [[ $rest != $line ]]; then
- hour=$match[2]
- minute=$match[3]
- second=$match[5]
- time_found=1
- else
- rest=${line#(#ibm)${~tspat}(<0-24>):(<0-59>)(.|[[:space:]]|(#e))}
- if [[ $rest != $line ]]; then
- hour=$match[2]
- minute=$match[3]
- time_found=1
- fi
- fi
- fi
+ fi
+fi
+if (( time_found == 0 )); then
+ # no colon, even, but a.m./p.m. indicator
+ rest=${line#(#ibm)${~tspat}(<0-12>)[[:space:]]#([ap])(|.)[[:space:]]#m(.|[[:space:]]|(#e))}
+ if [[ $rest != $line ]]; then
+ hour=$match[2]
+ minute=0
+ [[ $match[3] = (#i)p ]] && (( hour <= 12 )) && (( hour += 12 ))
+ time_found=1
+ fi
+fi
+if (( time_found == 0 )); then
+ # 24 hour clock, with seconds
+ rest=${line#(#ibm)${~tspat}(<0-24>):(<0-59>)[.:]((<0-59>)(.<->|))(.|[[:space:]]|(#e))}
+ if [[ $rest != $line ]]; then
+ hour=$match[2]
+ minute=$match[3]
+ second=$match[5]
+ time_found=1
+ fi
+fi
+if (( time_found == 0 )); then
+ rest=${line#(#ibm)${~tspat}(<0-24>):(<0-59>)(.|[[:space:]]|(#e))}
+ if [[ $rest != $line ]]; then
+ hour=$match[2]
+ minute=$match[3]
+ time_found=1
+ fi
+fi
+if (( time_found == 0 )); then
+ # Combined date and time formats: here we can use an anchor because
+ # we know the complete format.
+ (( anchor )) && tspat=$tspat_anchor
+ # RFC 2445
+ rest=${line#(#ibm)${~tspat}(|\"[^\"]##\":)($~d$~d$~d$~d)($~d$~d)($~d$~d)T($~d$~d)($~d$~d)($~d$~d)([[:space:]]#|(#e))}
+ if [[ $rest != $line ]]; then
+ year=$match[3]
+ month=$match[4]
+ day=$match[5]
+ hour=$match[6]
+ minute=$match[7]
+ second=$match[8]
+ # signal don't need to take account of time in date...
+ time_found=2
+ date_found=1
+ date_start=$mbegin[3]
+ date_end=$mend[-1]
fi
fi
(( hour == 24 )) && hour=0
-if (( time_found )); then
- # time was found
+if (( time_found && ! date_found )); then
+ # time was found; if data also found already, process below.
time_start=$mbegin[2]
time_end=$mend[-1]
# Remove the timespec because it may be in the middle of
@@ -331,7 +379,7 @@ if (( time_found )); then
(( debug )) && print "line after time: $line"
fi
-if (( relative == 0 )); then
+if (( relative == 0 && date_found == 0 )); then
# Date.
case $line in
# Look for YEAR[-/.]MONTH[-/.]DAY
@@ -468,7 +516,7 @@ if (( date_found || (time_ok && time_found) )); then
fi
line=${line[1,$date_start-1]}${line[$date_end+1,-1]}
fi
- if (( time_found )); then
+ if (( time_found == 1 )); then
if (( date_found )); then
# If we found a time, it must be associated with the date,
# or we can't use it. Since we removed the time from the
@@ -540,7 +588,7 @@ if (( date_found || (time_ok && time_found) )); then
"'$orig_line[time_start,time_end]'"
(( date_ok )) && print "Date string: $date_start,$date_end:" \
"'$orig_line[date_start,date_end]'"
- print "Remaining line: '$line'"
+ print "Remaining line: '$line$rest_line'"
fi
fi
fi
@@ -722,11 +770,11 @@ if (( relative )); then
(( reladd += (hour * 60 + minute) * 60 + second ))
typeset -g REPLY
(( REPLY = relative_start + reladd ))
- [[ -n $setvar ]] && typeset -g REPLY2="$line"
+ [[ -n $setvar ]] && typeset -g REPLY2="$line$rest_line"
return 0
fi
return 1
-elif (( ! date_found )); then
+elif (( date_found == 0 )); then
return 1
fi
@@ -748,6 +796,6 @@ fi
strftime -s REPLY -r $fmt $nums
-[[ -n $setvar ]] && typeset -g REPLY2="$line"
+[[ -n $setvar ]] && typeset -g REPLY2="$line$rest_line"
return 0