1 package davmail.exchange;
2 import java.util.regex.Pattern;
3 import org.apache.log4j.Logger;
4
5 /**
6 * Validator for iCalendar data according to RFC 5545 specifications.
7 * This implementation provides comprehensive validation and repair capabilities for iCalendar content,
8 * specifically focusing on character validation rather than XML structure.
9 *
10 * This helpful tool addresses synchronization issues between different calendar clients
11 * (OWA, Outlook, and Thunderbird via DavMail) where calendar entries containing invalid
12 * characters are handled differently across platforms. These problematic entries originate
13 * from the MS Exchange Server, where they were either stored with invalid characters or
14 * became corrupted during storage. While OWA and Outlook silently hide such entries without
15 * displaying them to users, Thunderbird logs XML parse errors in its error console. The
16 * validator provides detailed validation information about invalid characters and offers
17 * repair functionality to automatically remove problematic characters while preserving
18 * valid content.
19 *
20 * The implementation was developed to address a specific issue where calendar entries
21 * containing invalid string content are hidden in OWA and Outlook, making them inaccessible
22 * for manual deletion or repair. Since these entries are not visible in OWA and Outlook,
23 * users cannot remove or fix them before synchronization to Thunderbird, where they cause
24 * XML parsing errors. The solution provides a way to detect and repair these problematic
25 * entries, which could otherwise be handled by DavMail during the synchronization process.
26 * The issue is documented in Bugzilla at https://bugzilla.mozilla.org/show_bug.cgi?id=1941840.
27 *
28 * @author ifrh (<a href="https://github.com/ifrh">GitHub</a>)
29 * @author ifrh (<a href="https://sourceforge.net/u/ifrh/profile/">SourceForge</a>)
30 * @since 2025-02-05 // yyyy-mm-dd
31 */
32
33 public class ICSCalendarValidator {
34 protected static final Logger LOGGER = Logger.getLogger(ICSCalendarValidator.class);
35 // Optimized pattern for validation, tab, CR and LF are allowed in icalendar content
36 private static final Pattern VALID_CHARS_PATTERN =
37 Pattern.compile("^[\r\n\t\\x20-\\x7E\u0080-\uFFFF]*$");
38
39 // Constants for better readability
40 private static final char NULL_BYTE = '\u0000';
41 private static final char SPACE = ' ';
42 private static final char DELETE = '\u007F';
43
44 /**
45 * Validates whether a string contains only valid characters for iCalendar content.
46 * Ensures the string contains no null bytes and only printable ASCII characters,
47 * while allowing properly encoded Unicode characters.
48 * @param content The string to validate
49 * @return true if all characters are valid
50 */
51 public static boolean isValidICSContent(String content) {
52 return content != null && VALID_CHARS_PATTERN.matcher(content).matches();
53 }
54
55 /**
56 * Returns detailed validation information about the content.
57 * @param content The string to validate
58 * @return ValidationResult object containing details
59 */
60 public static ValidationResult validateWithDetails(String content) {
61 if (content == null) {
62 return new ValidationResult(false, "Content is null");
63 }
64
65 // Efficient validation checking all conditions in one pass
66 StringBuilder issues = new StringBuilder();
67 int nullByteCount = 0;
68 StringBuilder invalidChars = new StringBuilder();
69
70 for (char c : content.toCharArray()) {
71 if (c == NULL_BYTE) {
72 nullByteCount++;
73 } else if (((c < 32 && c!='\r' && c!='\n' && c!='\t') || c == DELETE || (c >= 128 && c <= 159))) {
74 invalidChars.append(String.format("\\u%04x,", (int)c));
75 }
76 }
77
78 // Collect all found problems
79 if (nullByteCount > 0) {
80 issues.append(nullByteCount).append(" null byte(s) found");
81 }
82 if (invalidChars.length() > 0) {
83 if (issues.length() > 0) issues.append(", ");
84 issues.append("Invalid character(s): ").append(
85 invalidChars.substring(0, invalidChars.length() - 1));
86 }
87
88 return new ValidationResult(issues.length() == 0, issues.toString());
89 }
90
91 /**
92 * Repairs an iCalendar string by removing invalid characters.
93 * Replaces multiple consecutive invalid characters with a single space.
94 * @param content The string to repair
95 * @return The repaired string
96 */
97 public static String repairICSContent(String content) {
98 if (content == null) return null;
99 String message ="ICSCalendarValidator repair characters in ICS content:";
100
101 StringBuilder repaired = new StringBuilder();
102 boolean lastWasInvalid = false;
103
104 for (char c : content.toCharArray()) {
105 if (isValidChar(c)) {
106 repaired.append(c);
107 lastWasInvalid = false;
108 } else if (!lastWasInvalid) {
109 repaired.append(SPACE);
110 lastWasInvalid = true;
111 }
112 }
113 String fixed = repaired.toString().trim();
114 // just put output to debug logger, only if some invalid characters has been changed.
115 if (!content.equals(fixed)){
116 LOGGER.debug ( message + "\n[" + content + "]\n => [" + fixed + "]\n fix complete.");
117 }
118 return fixed ;
119 }
120
121 /**
122 * Checks if a single character is valid.
123 * A character is valid if it is:
124 * - Not a control character (ASCII 0-31)
125 * - Not a delete character (ASCII 127)
126 * - Not an invalid Unicode character (128-159)
127 * @param c The character to check
128 * @return true if the character is valid
129 */
130 static boolean isValidChar(char c) {
131 return c > 0 && !(c == DELETE || (c >= 128 && c <= 159));
132 }
133
134 /**
135 * Result structure for validation results.
136 */
137 public static class ValidationResult {
138 private final boolean isValid;
139 private final String reason;
140
141 public ValidationResult(boolean isValid, String reason) {
142 this.isValid = isValid;
143 this.reason = reason;
144 }
145
146 public boolean isValid() { return isValid; }
147 public String showReason() { return reason; }
148 }
149 }