Add StringOutputBuffer class. This CL adds a new helper class used to store very large string output efficiently in memory, while allowing it to be written to a file, or compared with an existing file's content. This will be used in future CLs that will speed up and reduce the RAM usage of some operations like the ones handling --ide=json or -export-compile-command. + Add FileWriter class to encapsulate the Windows-specific creation logic required to work-around limitations of base::WriteFile() Change-Id: I87f1ba716e066769ca7c1dc28a46b1460022f1dd Reviewed-on: https://gn-review.googlesource.com/c/gn/+/7940 Commit-Queue: Scott Graham <scottmg@chromium.org> Reviewed-by: Scott Graham <scottmg@chromium.org>
diff --git a/build/gen.py b/build/gen.py index 44d32c8..582562c 100755 --- a/build/gen.py +++ b/build/gen.py
@@ -485,6 +485,7 @@ 'src/gn/escape.cc', 'src/gn/exec_process.cc', 'src/gn/filesystem_utils.cc', + 'src/gn/file_writer.cc', 'src/gn/frameworks_utils.cc', 'src/gn/function_exec_script.cc', 'src/gn/function_filter.cc', @@ -561,6 +562,7 @@ 'src/gn/source_file.cc', 'src/gn/standard_out.cc', 'src/gn/string_atom.cc', + 'src/gn/string_output_buffer.cc', 'src/gn/string_utils.cc', 'src/gn/substitution_list.cc', 'src/gn/substitution_pattern.cc', @@ -609,6 +611,7 @@ 'src/gn/escape_unittest.cc', 'src/gn/exec_process_unittest.cc', 'src/gn/filesystem_utils_unittest.cc', + 'src/gn/file_writer_unittest.cc', 'src/gn/frameworks_utils_unittest.cc', 'src/gn/function_filter_unittest.cc', 'src/gn/function_foreach_unittest.cc', @@ -661,6 +664,7 @@ 'src/gn/source_dir_unittest.cc', 'src/gn/source_file_unittest.cc', 'src/gn/string_atom_unittest.cc', + 'src/gn/string_output_buffer_unittest.cc', 'src/gn/string_utils_unittest.cc', 'src/gn/substitution_pattern_unittest.cc', 'src/gn/substitution_writer_unittest.cc',
diff --git a/src/gn/file_writer.cc b/src/gn/file_writer.cc new file mode 100644 index 0000000..a4bb5be --- /dev/null +++ b/src/gn/file_writer.cc
@@ -0,0 +1,108 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gn/file_writer.h" + +#include "base/files/file_path.h" +#include "base/logging.h" +#include "gn/filesystem_utils.h" + +#if defined(OS_WIN) +#include <windows.h> +#include "base/strings/utf_string_conversions.h" +#else +#include <fcntl.h> +#include <unistd.h> +#include "base/posix/eintr_wrapper.h" +#endif + +FileWriter::~FileWriter() = default; + +#if defined(OS_WIN) + +bool FileWriter::Create(const base::FilePath& file_path) { + // On Windows, provide a custom implementation of base::WriteFile. Sometimes + // the base version fails, especially on the bots. The guess is that Windows + // Defender or other antivirus programs still have the file open (after + // checking for the read) when the write happens immediately after. This + // version opens with FILE_SHARE_READ (normally not what you want when + // replacing the entire contents of the file) which lets us continue even if + // another program has the file open for reading. See + // http://crbug.com/468437 + file_path_ = base::UTF16ToUTF8(file_path.value()); + file_ = base::win::ScopedHandle(::CreateFile( + reinterpret_cast<LPCWSTR>(file_path.value().c_str()), GENERIC_WRITE, + FILE_SHARE_READ, NULL, CREATE_ALWAYS, 0, NULL)); + + valid_ = file_.IsValid(); + if (!valid_) { + PLOG(ERROR) << "CreateFile failed for path " << file_path_; + } + return valid_; +} + +bool FileWriter::Write(std::string_view str) { + if (!valid_) + return false; + + DWORD written; + BOOL result = + ::WriteFile(file_.Get(), str.data(), str.size(), &written, nullptr); + if (!result) { + PLOG(ERROR) << "writing file " << file_path_ << " failed"; + valid_ = false; + return false; + } + if (static_cast<size_t>(written) != str.size()) { + PLOG(ERROR) << "wrote " << written << " bytes to " + << file_path_ << " expected " << str.size(); + valid_ = false; + return false; + } + return true; +} + +bool FileWriter::Close() { + // NOTE: file_.Close() is not used here because it cannot return an error. + HANDLE handle = file_.Take(); + if (handle && !::CloseHandle(handle)) + return false; + + return valid_; +} + +#else // !OS_WIN + +bool FileWriter::Create(const base::FilePath& file_path) { + fd_.reset(HANDLE_EINTR(::creat(file_path.value().c_str(), 0666))); + valid_ = fd_.is_valid(); + if (!valid_) { + PLOG(ERROR) << "creat() failed for path " << file_path.value(); + } + return valid_; +} + +bool FileWriter::Write(std::string_view str) { + if (!valid_) + return false; + + while (!str.empty()) { + ssize_t written = HANDLE_EINTR(::write(fd_.get(), str.data(), str.size())); + if (written <= 0) { + valid_ = false; + return false; + } + str.remove_prefix(static_cast<size_t>(written)); + } + return true; +} + +bool FileWriter::Close() { + // The ScopedFD reset() method will crash on EBADF and ignore other errors + // intentionally, so no need to check anything here. + fd_.reset(); + return valid_; +} + +#endif // !OS_WIN
diff --git a/src/gn/file_writer.h b/src/gn/file_writer.h new file mode 100644 index 0000000..0ae5061 --- /dev/null +++ b/src/gn/file_writer.h
@@ -0,0 +1,74 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TOOLS_GN_FILE_WRITER_H_ +#define TOOLS_GN_FILE_WRITER_H_ + +#include "util/build_config.h" + +#if defined(OS_WIN) +#include "base/win/scoped_handle.h" +#else +#include "base/files/scoped_file.h" +#endif + +#include <string> + +namespace base { +class FilePath; +} + +// Convenience class to write data to a file. This is used to work around two +// limitations of base::WriteFile, i.e.: +// +// - base::WriteFile() doesn't allow multiple writes to the target file. +// +// +// - Windows-specific issues created by anti-virus programs required opening +// the file differently (see http://crbug.com/468437). +// +// Usage is: +// 1) Create instance. +// 2) Call Create() to create the file. +// 3) Call Write() one or more times to write data to it. +// 4) Call Close(), or the destructor, to close the file. +// +// As soon as one method fails, all other calls will return false, this allows +// simplified error checking as in: +// +// FileWriter writer; +// writer.Create(<some_path>); +// writer.Write(<some_data>); +// writer.Write(<some_more_data>); +// if (!writer.Close()) { +// ... error happened in one of the above calls. +// } +// +class FileWriter { + public: + FileWriter() = default; + ~FileWriter(); + + // Create output file. Return true on success, false otherwise. + bool Create(const base::FilePath& file_path); + + // Append |data| to the output file. Return true on success, or false on + // failure or if any previous Create() or Write() call failed. + bool Write(std::string_view data); + + // Close the file. Return true on success, or false on failure or if + // any previous Create() or Write() call failed. + bool Close(); + + private: +#if defined(OS_WIN) + base::win::ScopedHandle file_; + std::string file_path_; +#else + base::ScopedFD fd_; +#endif + bool valid_ = true; +}; + +#endif // TOOLS_GN_FILE_WRITER_H_
diff --git a/src/gn/file_writer_unittest.cc b/src/gn/file_writer_unittest.cc new file mode 100644 index 0000000..49f533a --- /dev/null +++ b/src/gn/file_writer_unittest.cc
@@ -0,0 +1,44 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gn/file_writer.h" + +#include "base/files/file_path.h" +#include "base/files/scoped_temp_dir.h" +#include "gn/filesystem_utils.h" + +#include "util/test/test.h" + +TEST(FileWriter, SingleWrite) { + base::ScopedTempDir temp_dir; + ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); + + std::string data = "foo"; + + base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt"); + + FileWriter writer; + EXPECT_TRUE(writer.Create(file_path)); + EXPECT_TRUE(writer.Write(data)); + EXPECT_TRUE(writer.Close()); + + EXPECT_TRUE(ContentsEqual(file_path, data)); +} + +TEST(FileWriter, MultipleWrites) { + base::ScopedTempDir temp_dir; + ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); + + std::string data = "Hello World!"; + + base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt"); + + FileWriter writer; + EXPECT_TRUE(writer.Create(file_path)); + EXPECT_TRUE(writer.Write("Hello ")); + EXPECT_TRUE(writer.Write("World!")); + EXPECT_TRUE(writer.Close()); + + EXPECT_TRUE(ContentsEqual(file_path, data)); +}
diff --git a/src/gn/filesystem_utils.cc b/src/gn/filesystem_utils.cc index fee2596..bc95dce 100644 --- a/src/gn/filesystem_utils.cc +++ b/src/gn/filesystem_utils.cc
@@ -7,9 +7,9 @@ #include <algorithm> #include "base/files/file_util.h" -#include "base/logging.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" +#include "gn/file_writer.h" #include "gn/location.h" #include "gn/settings.h" #include "gn/source_dir.h" @@ -969,44 +969,10 @@ return false; } - int size = static_cast<int>(data.size()); - bool write_success = false; - -#if defined(OS_WIN) - // On Windows, provide a custom implementation of base::WriteFile. Sometimes - // the base version fails, especially on the bots. The guess is that Windows - // Defender or other antivirus programs still have the file open (after - // checking for the read) when the write happens immediately after. This - // version opens with FILE_SHARE_READ (normally not what you want when - // replacing the entire contents of the file) which lets us continue even if - // another program has the file open for reading. See http://crbug.com/468437 - base::win::ScopedHandle file(::CreateFile( - reinterpret_cast<LPCWSTR>(file_path.value().c_str()), GENERIC_WRITE, - FILE_SHARE_READ, NULL, CREATE_ALWAYS, 0, NULL)); - if (file.IsValid()) { - DWORD written; - BOOL result = ::WriteFile(file.Get(), data.c_str(), size, &written, NULL); - if (result) { - if (static_cast<int>(written) == size) { - write_success = true; - } else { - // Didn't write all the bytes. - LOG(ERROR) << "wrote" << written << " bytes to " - << base::UTF16ToUTF8(file_path.value()) << " expected " - << size; - } - } else { - // WriteFile failed. - PLOG(ERROR) << "writing file " << base::UTF16ToUTF8(file_path.value()) - << " failed"; - } - } else { - PLOG(ERROR) << "CreateFile failed for path " - << base::UTF16ToUTF8(file_path.value()); - } -#else - write_success = base::WriteFile(file_path, data.c_str(), size) == size; -#endif + FileWriter writer; + writer.Create(file_path); + writer.Write(data); + bool write_success = writer.Close(); if (!write_success && err) { *err = Err(Location(), "Unable to write file.",
diff --git a/src/gn/string_output_buffer.cc b/src/gn/string_output_buffer.cc new file mode 100644 index 0000000..09913ce --- /dev/null +++ b/src/gn/string_output_buffer.cc
@@ -0,0 +1,117 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gn/string_output_buffer.h" + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "gn/err.h" +#include "gn/file_writer.h" +#include "gn/filesystem_utils.h" + +#include <fstream> + +std::string StringOutputBuffer::str() const { + std::string result; + size_t data_size = size(); + result.reserve(data_size); + for (size_t nn = 0; nn < pages_.size(); ++nn) { + size_t wanted_size = std::min(kPageSize, data_size - nn * kPageSize); + result.append(pages_[nn]->data(), wanted_size); + } + return result; +} + +void StringOutputBuffer::Append(const char* str, size_t len) { + Append(std::string_view(str, len)); +} + +void StringOutputBuffer::Append(std::string_view str) { + while (str.size() > 0) { + if (page_free_size() == 0) { + // Allocate a new page. + pages_.push_back(std::make_unique<Page>()); + pos_ = 0; + } + size_t size = std::min(page_free_size(), str.size()); + memcpy(pages_.back()->data() + pos_, str.data(), size); + pos_ += size; + str.remove_prefix(size); + } +} + +void StringOutputBuffer::Append(char c) { + if (page_free_size() == 0) { + // Allocate a new page. + pages_.push_back(std::make_unique<Page>()); + pos_ = 0; + } + pages_.back()->data()[pos_] = c; + pos_ += 1; +} + +bool StringOutputBuffer::ContentsEqual(const base::FilePath& file_path) const { + // Compare file and stream sizes first. Quick and will save us some time if + // they are different sizes. + size_t data_size = size(); + int64_t file_size; + if (!base::GetFileSize(file_path, &file_size) || + static_cast<size_t>(file_size) != data_size) { + return false; + } + + // Open the file in binary mode. + std::ifstream file(file_path.As8Bit().c_str(), std::ios::binary); + if (!file.is_open()) + return false; + + size_t page_count = pages_.size(); + Page file_page; + for (size_t nn = 0; nn < page_count; ++nn) { + size_t wanted_size = std::min(data_size - nn * kPageSize, kPageSize); + file.read(file_page.data(), wanted_size); + if (!file.good()) + return false; + + if (memcmp(file_page.data(), pages_[nn]->data(), wanted_size) != 0) + return false; + } + return true; +} + +// Write the contents of this instance to a file at |file_path|. +bool StringOutputBuffer::WriteToFile(const base::FilePath& file_path, + Err* err) const { + // Create the directory if necessary. + if (!base::CreateDirectory(file_path.DirName())) { + if (err) { + *err = + Err(Location(), "Unable to create directory.", + "I was using \"" + FilePathToUTF8(file_path.DirName()) + "\"."); + } + return false; + } + + size_t data_size = size(); + size_t page_count = pages_.size(); + + FileWriter writer; + bool success = writer.Create(file_path); + if (success) { + for (size_t nn = 0; nn < page_count; ++nn) { + size_t wanted_size = std::min(data_size - nn * kPageSize, kPageSize); + success = writer.Write(std::string_view(pages_[nn]->data(), wanted_size)); + if (!success) + break; + } + } + if (!writer.Close()) + success = false; + + if (!success && err) { + *err = Err(Location(), "Unable to write file.", + "I was writing \"" + FilePathToUTF8(file_path) + "\"."); + } + return success; +}
diff --git a/src/gn/string_output_buffer.h b/src/gn/string_output_buffer.h new file mode 100644 index 0000000..b42b638 --- /dev/null +++ b/src/gn/string_output_buffer.h
@@ -0,0 +1,93 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TOOLS_GN_STRING_OUTPUT_BUFFER_H_ +#define TOOLS_GN_STRING_OUTPUT_BUFFER_H_ + +#include <array> +#include <streambuf> +#include <string> +#include <vector> + +namespace base { +class FilePath; +} // namespace base + +class Err; + +// An append-only very large storage area for string data. Useful for the parts +// of GN that need to generate huge output files (e.g. --ide=json will create +// a 139 MiB project.json file for the Fuchsia build). +// +// Usage is the following: +// +// 1) Create instance. +// +// 2) Use operator<<, or Append() to append data to the instance. +// +// 3) Alternatively, create an std::ostream that takes its address as +// argument, then use the output stream as usual to append data to it. +// +// StringOutputBuffer storage; +// std::ostream out(&storage); +// out << "Hello world!"; +// +// 4) Use ContentsEqual() to compare the instance's content with that of a +// given file. +// +// 5) Use WriteToFile() to write the content to a given file. +// +class StringOutputBuffer : public std::streambuf { + public: + StringOutputBuffer() = default; + + // Convert content to single std::string instance. Useful for unit-testing. + std::string str() const; + + // Return the number of characters stored in this instance. + size_t size() const { return (pages_.size() - 1u) * kPageSize + pos_; } + + // Append string to this instance. + void Append(const char* str, size_t len); + void Append(std::string_view str); + void Append(char c); + + StringOutputBuffer& operator<<(std::string_view str) { + Append(str); + return *this; + } + + // Compare the content of this instance with that of the file at |file_path|. + bool ContentsEqual(const base::FilePath& file_path) const; + + // Write the contents of this instance to a file at |file_path|. + bool WriteToFile(const base::FilePath& file_path, Err* err) const; + + static size_t GetPageSizeForTesting() { return kPageSize; } + + protected: + // Called by std::ostream to write |n| chars from |s|. + std::streamsize xsputn(const char* s, std::streamsize n) override { + Append(s, static_cast<size_t>(n)); + return n; + } + + // Called by std::ostream to write a single character. + int_type overflow(int_type ch) override { + Append(static_cast<char>(ch)); + return 1; + } + + private: + // Return the number of free bytes in the current page. + size_t page_free_size() const { return kPageSize - pos_; } + + static constexpr size_t kPageSize = 65536; + using Page = std::array<char, kPageSize>; + + size_t pos_ = kPageSize; + std::vector<std::unique_ptr<Page>> pages_; +}; + +#endif // TOOLS_GN_STRING_OUTPUT_BUFFER_H_
diff --git a/src/gn/string_output_buffer_unittest.cc b/src/gn/string_output_buffer_unittest.cc new file mode 100644 index 0000000..6dcb741 --- /dev/null +++ b/src/gn/string_output_buffer_unittest.cc
@@ -0,0 +1,134 @@ +// Copyright 2020 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gn/string_output_buffer.h" + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_temp_dir.h" +#include "util/test/test.h" + +namespace { + +// Create a test string of |size| characters with pseudo-random ASCII content. +std::string CreateTestString(size_t size, size_t seed = 0) { + std::string result; + result.resize(size); + for (size_t n = 0; n < size; ++n) { + int offset = (size + seed + n * 1337); + char ch = ' ' + offset % (127 - 32); + result[n] = ch; + } + return result; +} + +} // namespace + +TEST(StringOutputBuffer, Append) { + const size_t data_size = 100000; + std::string data = CreateTestString(data_size); + + const size_t num_spans = 50; + const size_t span_size = data_size / num_spans; + + StringOutputBuffer buffer; + + for (size_t n = 0; n < num_spans; ++n) { + size_t start_offset = n * span_size; + size_t end_offset = std::min(start_offset + span_size, data.size()); + buffer.Append(&data[start_offset], end_offset - start_offset); + } + + EXPECT_EQ(data.size(), buffer.size()); + ASSERT_STREQ(data.c_str(), buffer.str().c_str()); +} + +TEST(StringOutputBuffer, AppendWithPageSizeMultiples) { + const size_t page_size = StringOutputBuffer::GetPageSizeForTesting(); + const size_t page_count = 100; + const size_t data_size = page_size * page_count; + std::string data = CreateTestString(data_size); + + StringOutputBuffer buffer; + + for (size_t n = 0; n < page_count; ++n) { + size_t start_offset = n * page_size; + buffer.Append(&data[start_offset], page_size); + } + + EXPECT_EQ(data.size(), buffer.size()); + ASSERT_STREQ(data.c_str(), buffer.str().c_str()); +} + +TEST(StringOutput, WrappedByStdOstream) { + const size_t data_size = 100000; + std::string data = CreateTestString(data_size); + + const size_t num_spans = 50; + const size_t span_size = data_size / num_spans; + + StringOutputBuffer buffer; + std::ostream out(&buffer); + + for (size_t n = 0; n < num_spans; ++n) { + size_t start_offset = n * span_size; + size_t end_offset = std::min(start_offset + span_size, data.size()); + out << std::string_view(&data[start_offset], end_offset - start_offset); + } + + EXPECT_EQ(data.size(), buffer.size()); + ASSERT_STREQ(data.c_str(), buffer.str().c_str()); +} + +TEST(StringOutputBuffer, ContentsEqual) { + base::ScopedTempDir temp_dir; + ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); + + const size_t data_size = 100000; + std::string data = CreateTestString(data_size); + + base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt"); + base::WriteFile(file_path, data.c_str(), static_cast<int>(data.size())); + + { + StringOutputBuffer buffer; + buffer.Append(data); + + EXPECT_TRUE(buffer.ContentsEqual(file_path)); + + // Different length and contents. + buffer << "extra"; + EXPECT_FALSE(buffer.ContentsEqual(file_path)); + } + + // The same length, different contents. + { + StringOutputBuffer buffer; + buffer << CreateTestString(data_size, 1); + + EXPECT_FALSE(buffer.ContentsEqual(file_path)); + } +} + +TEST(StringOutputBuffer, WriteToFile) { + base::ScopedTempDir temp_dir; + ASSERT_TRUE(temp_dir.CreateUniqueTempDir()); + + const size_t data_size = 100000; + std::string data = CreateTestString(data_size); + + // Write if file doesn't exist. Also create directory. + base::FilePath file_path = + temp_dir.GetPath().AppendASCII("bar").AppendASCII("foo.txt"); + + StringOutputBuffer buffer; + buffer.Append(data); + + EXPECT_TRUE(buffer.WriteToFile(file_path, nullptr)); + + // Verify file was created and has same content. + base::File::Info file_info; + ASSERT_TRUE(base::GetFileInfo(file_path, &file_info)); + ASSERT_TRUE(buffer.ContentsEqual(file_path)); +}