1 package davmail.exchange; 2 import java.util.regex.Pattern; 3 import org.apache.log4j.Logger; 4 5 /** 6 * Validator for iCalendar data according to RFC 5545 specifications. 7 * This implementation provides comprehensive validation and repair capabilities for iCalendar content, 8 * specifically focusing on character validation rather than XML structure. 9 * 10 * This helpful tool addresses synchronization issues between different calendar clients 11 * (OWA, Outlook, and Thunderbird via DavMail) where calendar entries containing invalid 12 * characters are handled differently across platforms. These problematic entries originate 13 * from the MS Exchange Server, where they were either stored with invalid characters or 14 * became corrupted during storage. While OWA and Outlook silently hide such entries without 15 * displaying them to users, Thunderbird logs XML parse errors in its error console. The 16 * validator provides detailed validation information about invalid characters and offers 17 * repair functionality to automatically remove problematic characters while preserving 18 * valid content. 19 * 20 * The implementation was developed to address a specific issue where calendar entries 21 * containing invalid string content are hidden in OWA and Outlook, making them inaccessible 22 * for manual deletion or repair. Since these entries are not visible in OWA and Outlook, 23 * users cannot remove or fix them before synchronization to Thunderbird, where they cause 24 * XML parsing errors. The solution provides a way to detect and repair these problematic 25 * entries, which could otherwise be handled by DavMail during the synchronization process. 26 * The issue is documented in Bugzilla at https://bugzilla.mozilla.org/show_bug.cgi?id=1941840. 27 * 28 * @author ifrh (<a href="https://github.com/ifrh">GitHub</a>) 29 * @author ifrh (<a href="https://sourceforge.net/u/ifrh/profile/">SourceForge</a>) 30 * @since 2025-02-05 // yyyy-mm-dd 31 */ 32 33 public class ICSCalendarValidator { 34 protected static final Logger LOGGER = Logger.getLogger(ICSCalendarValidator.class); 35 // Optimized pattern for validation 36 private static final Pattern VALID_CHARS_PATTERN = 37 Pattern.compile("^[\\x20-\\x7E\u0080-\uFFFF]*$"); 38 39 // Constants for better readability 40 private static final char NULL_BYTE = '\u0000'; 41 private static final char SPACE = ' '; 42 private static final char DELETE = '\u007F'; 43 44 /** 45 * Validates whether a string contains only valid characters for iCalendar content. 46 * Ensures the string contains no null bytes and only printable ASCII characters, 47 * while allowing properly encoded Unicode characters. 48 * @param content The string to validate 49 * @return true if all characters are valid 50 */ 51 public static boolean isValidICSContent(String content) { 52 return content != null && VALID_CHARS_PATTERN.matcher(content).matches(); 53 } 54 55 /** 56 * Returns detailed validation information about the content. 57 * @param content The string to validate 58 * @return ValidationResult object containing details 59 */ 60 public static ValidationResult validateWithDetails(String content) { 61 if (content == null) { 62 return new ValidationResult(false, "Content is null"); 63 } 64 65 // Efficient validation checking all conditions in one pass 66 StringBuilder issues = new StringBuilder(); 67 int nullByteCount = 0; 68 StringBuilder invalidChars = new StringBuilder(); 69 70 for (char c : content.toCharArray()) { 71 if (c == NULL_BYTE) { 72 nullByteCount++; 73 } else if ((c < 32 || c == DELETE || (c >= 128 && c <= 159))) { 74 invalidChars.append(String.format("\\u%04x,", (int)c)); 75 } 76 } 77 78 // Collect all found problems 79 if (nullByteCount > 0) { 80 issues.append(nullByteCount).append(" null byte(s) found"); 81 } 82 if (invalidChars.length() > 0) { 83 if (issues.length() > 0) issues.append(", "); 84 issues.append("Invalid character(s): ").append( 85 invalidChars.substring(0, invalidChars.length() - 1)); 86 } 87 88 return new ValidationResult(issues.length() == 0, issues.toString()); 89 } 90 91 /** 92 * Repairs an iCalendar string by removing invalid characters. 93 * Replaces multiple consecutive invalid characters with a single space. 94 * @param content The string to repair 95 * @return The repaired string 96 */ 97 public static String repairICSContent(String content) { 98 if (content == null) return null; 99 String message ="ICSCalendarValidator repair characters in ICS content:"; 100 101 StringBuilder repaired = new StringBuilder(); 102 boolean lastWasInvalid = false; 103 104 for (char c : content.toCharArray()) { 105 if (isValidChar(c)) { 106 repaired.append(c); 107 lastWasInvalid = false; 108 } else if (!lastWasInvalid) { 109 repaired.append(SPACE); 110 lastWasInvalid = true; 111 } 112 } 113 String fixed = repaired.toString().trim(); 114 // just put output to debug logger, only if some invalid characters has been changed. 115 if (!content.equals(fixed)){ 116 LOGGER.debug ( message + "\n[" + content + "]\n => [" + fixed + "]\n fix complete."); 117 } 118 return fixed ; 119 } 120 121 /** 122 * Checks if a single character is valid. 123 * A character is valid if it is: 124 * - Not a control character (ASCII 0-31) 125 * - Not a delete character (ASCII 127) 126 * - Not an invalid Unicode character (128-159) 127 * @param c The character to check 128 * @return true if the character is valid 129 */ 130 static boolean isValidChar(char c) { 131 return c > 0 && !(c == DELETE || (c >= 128 && c <= 159)); 132 } 133 134 /** 135 * Result structure for validation results. 136 */ 137 public static class ValidationResult { 138 private final boolean isValid; 139 private final String reason; 140 141 public ValidationResult(boolean isValid, String reason) { 142 this.isValid = isValid; 143 this.reason = reason; 144 } 145 146 public boolean isValid() { return isValid; } 147 public String showReason() { return reason; } 148 } 149 }