001/* 002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 * $Id$ 020 */ 021 022package org.nuxeo.ecm.core.convert.plugins.text.extractors; 023 024import java.io.File; 025import java.io.FileInputStream; 026import java.io.FileOutputStream; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.OutputStream; 030import java.io.Serializable; 031import java.util.ArrayList; 032import java.util.List; 033import java.util.Map; 034 035import javax.mail.Address; 036import javax.mail.Message.RecipientType; 037import javax.mail.MessagingException; 038import javax.mail.Multipart; 039import javax.mail.Part; 040import javax.mail.Session; 041import javax.mail.internet.ContentType; 042import javax.mail.internet.MimeMessage; 043 044import org.apache.commons.logging.Log; 045import org.apache.commons.logging.LogFactory; 046import org.nuxeo.ecm.core.api.Blob; 047import org.nuxeo.ecm.core.api.Blobs; 048import org.nuxeo.ecm.core.api.blobholder.BlobHolder; 049import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder; 050import org.nuxeo.ecm.core.convert.api.ConversionException; 051import org.nuxeo.ecm.core.convert.api.ConversionService; 052import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder; 053import org.nuxeo.ecm.core.convert.extension.Converter; 054import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor; 055import org.nuxeo.runtime.api.Framework; 056 057public class RFC822ToTextConverter implements Converter { 058 059 private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class); 060 061 private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822"; 062 063 private static final String TXT_MT = "text/plain"; 064 065 protected ConverterDescriptor descriptor; 066 067 protected Blob extractTextFromMessage(Blob blob) { 068 if (blob == null) { 069 return null; 070 } 071 File f = null; 072 OutputStream fo = null; 073 try { 074 MimeMessage msg = new MimeMessage((Session) null, blob.getStream()); 075 f = Framework.createTempFile("rfc822totext", ".txt"); 076 fo = new FileOutputStream(f); 077 List<Part> parts = getAttachmentParts(msg); 078 writeInfo(fo, msg.getSubject()); 079 writeInfo(fo, msg.getFrom()); 080 writeInfo(fo, msg.getRecipients(RecipientType.TO)); 081 writeInfo(fo, msg.getRecipients(RecipientType.CC)); 082 for (Part part : parts) { 083 writeInfo(fo, part.getFileName()); 084 writeInfo(fo, part.getDescription()); 085 byte[] extracted = extractTextFromMessagePart(part); 086 if (extracted != null) { 087 writeInfo(fo, extracted); 088 } 089 } 090 Blob outblob; 091 try (InputStream in = new FileInputStream(f)) { 092 outblob = Blobs.createBlob(in); 093 } 094 outblob.setMimeType(descriptor.getDestinationMimeType()); 095 return outblob; 096 } catch (IOException | MessagingException e) { 097 log.error(e); 098 } finally { 099 if (fo != null) { 100 try { 101 fo.close(); 102 } catch (IOException e) { 103 log.error(e); 104 } 105 } 106 if (f != null) { 107 f.delete(); 108 } 109 } 110 return null; 111 } 112 113 protected static void writeInfo(OutputStream stream, Address address) { 114 if (address != null) { 115 try { 116 stream.write(address.toString().getBytes()); 117 stream.write(" ".getBytes()); 118 } catch (IOException e) { 119 log.error(e, e); 120 } 121 } 122 } 123 124 protected static void writeInfo(OutputStream stream, Address[] addresses) { 125 if (addresses != null) { 126 for (Address address : addresses) { 127 writeInfo(stream, address); 128 } 129 } 130 } 131 132 protected static void writeInfo(OutputStream stream, String info) { 133 if (info != null) { 134 try { 135 stream.write(info.getBytes()); 136 stream.write(" ".getBytes()); 137 } catch (IOException e) { 138 log.error(e, e); 139 } 140 } 141 } 142 143 protected static void writeInfo(OutputStream stream, byte[] info) { 144 if (info != null) { 145 try { 146 stream.write(info); 147 stream.write(" ".getBytes()); 148 } catch (IOException e) { 149 log.error(e, e); 150 } 151 } 152 } 153 154 protected static byte[] extractTextFromMessagePart(Part p) throws MessagingException, IOException { 155 ContentType contentType = new ContentType(p.getContentType()); 156 String baseType = contentType.getBaseType(); 157 if (TXT_MT.equals(baseType)) { 158 Object content = p.getContent(); 159 if (content instanceof String) { 160 return ((String) content).getBytes(); 161 } else { 162 return null; 163 } 164 } 165 ConversionService cs = Framework.getLocalService(ConversionService.class); 166 167 String converterName = cs.getConverterName(baseType, TXT_MT); 168 if (converterName == null) { 169 return null; 170 } else { 171 Blob blob; 172 try (InputStream in = p.getInputStream()) { 173 blob = Blobs.createBlob(in); 174 } 175 BlobHolder result = cs.convert(converterName, new SimpleBlobHolder(blob), null); 176 return result.getBlob().getByteArray(); 177 } 178 } 179 180 protected static List<Part> getAttachmentParts(Part p) throws MessagingException, IOException { 181 List<Part> res = new ArrayList<Part>(); 182 if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) { 183 res.addAll(getAttachmentParts((Part) p.getContent())); 184 } else if (p.isMimeType("multipart/alternative")) { 185 // only return one of the text alternatives 186 Multipart mp = (Multipart) p.getContent(); 187 int count = mp.getCount(); 188 Part alternativePart = null; 189 for (int i = 0; i < count; i++) { 190 Part subPart = mp.getBodyPart(i); 191 if (subPart.isMimeType(TXT_MT)) { 192 alternativePart = subPart; 193 break; 194 } else if (subPart.isMimeType("text/*")) { 195 alternativePart = subPart; 196 } else { 197 res.addAll(getAttachmentParts(subPart)); 198 } 199 } 200 if (alternativePart != null) { 201 res.add(alternativePart); 202 } 203 } else if (p.isMimeType("multipart/*")) { 204 Multipart mp = (Multipart) p.getContent(); 205 int count = mp.getCount(); 206 for (int i = 0; i < count; i++) { 207 res.addAll(getAttachmentParts(mp.getBodyPart(i))); 208 } 209 } else { 210 res.add(p); 211 } 212 return res; 213 } 214 215 @Override 216 public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException { 217 Blob inputBlob = blobHolder.getBlob(); 218 Blob outputBlob = extractTextFromMessage(inputBlob); 219 return new SimpleCachableBlobHolder(outputBlob); 220 } 221 222 @Override 223 public void init(ConverterDescriptor descriptor) { 224 this.descriptor = descriptor; 225 } 226 227}