001/*
002 * (C) Copyright 2006-2009 Nuxeo SAS (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 * $Id$
018 */
019
020package org.nuxeo.ecm.core.convert.plugins.text.extractors;
021
022import java.io.File;
023import java.io.FileInputStream;
024import java.io.FileOutputStream;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.OutputStream;
028import java.io.Serializable;
029import java.util.ArrayList;
030import java.util.List;
031import java.util.Map;
032
033import javax.mail.Address;
034import javax.mail.Message.RecipientType;
035import javax.mail.MessagingException;
036import javax.mail.Multipart;
037import javax.mail.Part;
038import javax.mail.Session;
039import javax.mail.internet.ContentType;
040import javax.mail.internet.MimeMessage;
041
042import org.apache.commons.logging.Log;
043import org.apache.commons.logging.LogFactory;
044import org.nuxeo.ecm.core.api.Blob;
045import org.nuxeo.ecm.core.api.Blobs;
046import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
047import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
048import org.nuxeo.ecm.core.convert.api.ConversionException;
049import org.nuxeo.ecm.core.convert.api.ConversionService;
050import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
051import org.nuxeo.ecm.core.convert.extension.Converter;
052import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
053import org.nuxeo.runtime.api.Framework;
054
055public class RFC822ToTextConverter implements Converter {
056
057    private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class);
058
059    private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
060
061    private static final String TXT_MT = "text/plain";
062
063    protected ConverterDescriptor descriptor;
064
065    protected Blob extractTextFromMessage(Blob blob) {
066        if (blob == null) {
067            return null;
068        }
069        File f = null;
070        OutputStream fo = null;
071        try {
072            MimeMessage msg = new MimeMessage((Session) null, blob.getStream());
073            f = File.createTempFile("rfc822totext", ".txt");
074            fo = new FileOutputStream(f);
075            List<Part> parts = getAttachmentParts(msg);
076            writeInfo(fo, msg.getSubject());
077            writeInfo(fo, msg.getFrom());
078            writeInfo(fo, msg.getRecipients(RecipientType.TO));
079            writeInfo(fo, msg.getRecipients(RecipientType.CC));
080            for (Part part : parts) {
081                writeInfo(fo, part.getFileName());
082                writeInfo(fo, part.getDescription());
083                byte[] extracted = extractTextFromMessagePart(part);
084                if (extracted != null) {
085                    writeInfo(fo, extracted);
086                }
087            }
088            Blob outblob;
089            try (InputStream in = new FileInputStream(f)) {
090                outblob = Blobs.createBlob(in);
091            }
092            outblob.setMimeType(descriptor.getDestinationMimeType());
093            return outblob;
094        } catch (IOException | MessagingException e) {
095            log.error(e);
096        } finally {
097            if (fo != null) {
098                try {
099                    fo.close();
100                } catch (IOException e) {
101                    log.error(e);
102                }
103            }
104            if (f != null) {
105                f.delete();
106            }
107        }
108        return null;
109    }
110
111    protected static void writeInfo(OutputStream stream, Address address) {
112        if (address != null) {
113            try {
114                stream.write(address.toString().getBytes());
115                stream.write(" ".getBytes());
116            } catch (IOException e) {
117                log.error(e, e);
118            }
119        }
120    }
121
122    protected static void writeInfo(OutputStream stream, Address[] addresses) {
123        if (addresses != null) {
124            for (Address address : addresses) {
125                writeInfo(stream, address);
126            }
127        }
128    }
129
130    protected static void writeInfo(OutputStream stream, String info) {
131        if (info != null) {
132            try {
133                stream.write(info.getBytes());
134                stream.write(" ".getBytes());
135            } catch (IOException e) {
136                log.error(e, e);
137            }
138        }
139    }
140
141    protected static void writeInfo(OutputStream stream, byte[] info) {
142        if (info != null) {
143            try {
144                stream.write(info);
145                stream.write(" ".getBytes());
146            } catch (IOException e) {
147                log.error(e, e);
148            }
149        }
150    }
151
152    protected static byte[] extractTextFromMessagePart(Part p) throws MessagingException, IOException {
153        ContentType contentType = new ContentType(p.getContentType());
154        String baseType = contentType.getBaseType();
155        if (TXT_MT.equals(baseType)) {
156            Object content = p.getContent();
157            if (content instanceof String) {
158                return ((String) content).getBytes();
159            } else {
160                return null;
161            }
162        }
163        ConversionService cs = Framework.getLocalService(ConversionService.class);
164
165        String converterName = cs.getConverterName(baseType, TXT_MT);
166        if (converterName == null) {
167            return null;
168        } else {
169            Blob blob;
170            try (InputStream in = p.getInputStream()) {
171                blob = Blobs.createBlob(in);
172            }
173            BlobHolder result = cs.convert(converterName, new SimpleBlobHolder(blob), null);
174            return result.getBlob().getByteArray();
175        }
176    }
177
178    protected static List<Part> getAttachmentParts(Part p) throws MessagingException, IOException {
179        List<Part> res = new ArrayList<Part>();
180        if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
181            res.addAll(getAttachmentParts((Part) p.getContent()));
182        } else if (p.isMimeType("multipart/alternative")) {
183            // only return one of the text alternatives
184            Multipart mp = (Multipart) p.getContent();
185            int count = mp.getCount();
186            Part alternativePart = null;
187            for (int i = 0; i < count; i++) {
188                Part subPart = mp.getBodyPart(i);
189                if (subPart.isMimeType(TXT_MT)) {
190                    alternativePart = subPart;
191                    break;
192                } else if (subPart.isMimeType("text/*")) {
193                    alternativePart = subPart;
194                } else {
195                    res.addAll(getAttachmentParts(subPart));
196                }
197            }
198            if (alternativePart != null) {
199                res.add(alternativePart);
200            }
201        } else if (p.isMimeType("multipart/*")) {
202            Multipart mp = (Multipart) p.getContent();
203            int count = mp.getCount();
204            for (int i = 0; i < count; i++) {
205                res.addAll(getAttachmentParts(mp.getBodyPart(i)));
206            }
207        } else {
208            res.add(p);
209        }
210        return res;
211    }
212
213    @Override
214    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
215        Blob inputBlob = blobHolder.getBlob();
216        Blob outputBlob = extractTextFromMessage(inputBlob);
217        return new SimpleCachableBlobHolder(outputBlob);
218    }
219
220    @Override
221    public void init(ConverterDescriptor descriptor) {
222        this.descriptor = descriptor;
223    }
224
225}