C#读取Word的内容

下了一个电子书,里面居然全部都是word格式的文件,一点都不便于阅读,连个目录都不好翻,只好把它读出来,弄成html格式的。下面是读取word内容的代码,比较简单了。

using System;
using System.IO;
using System.Reflection;
using Word;   

//----------------------------------------------------------------------
static string WordReader( string path )
{
	string WordContent = "";

	Word.Application app = new ApplicationClass();

	object fileName = path;
	object optional = Missing.Value;
	object visible = true;
	if( File.Exists( fileName.ToString() ) )
	{
		Word.Document doc = app.Documents.Open(
		 ref fileName,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref visible,
		 ref optional,
		 ref optional,
		 ref optional,
		 ref optional );

		WordContent = doc.Content.Text;

		object saveChanges = WdSaveOptions.wdDoNotSaveChanges;
		object originalFormat = Missing.Value;
		object routeDocument = Missing.Value;
		app.Quit( ref saveChanges, ref originalFormat, ref routeDocument );
	}
	else
	{
		return string.Empty;
	}

	return WordContent;

}

工程中引用“Microsoft Word 11.0 object library”的Microsoft COM组件。该组件提供的类和方法来读取Word文档